diff --git a/.gitattributes b/.gitattributes
index c0bff6e189..7c648c0774 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -80,8 +80,11 @@
*.pvr binary
*.snk binary
*.tga binary
+*.tif binary
+*.tiff binary
*.ttc binary
*.ttf binary
+*.wbmp binary
*.webp binary
*.woff binary
*.woff2 binary
diff --git a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs b/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs
deleted file mode 100644
index f265bdd517..0000000000
--- a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright (c) Six Labors.
-// Licensed under the Apache License, Version 2.0.
-
-using System;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using SixLabors.ImageSharp.Memory;
-using SixLabors.ImageSharp.PixelFormats;
-
-namespace SixLabors.ImageSharp
-{
- ///
- /// Extension methods for .
- /// TODO: One day rewrite all this to use SIMD intrinsics. There's a lot of scope for improvement.
- ///
- internal static class DenseMatrixUtils
- {
- ///
- /// Computes the sum of vectors in the span referenced by weighted by the two kernel weight values.
- /// Using this method the convolution filter is not applied to alpha in addition to the color channels.
- ///
- /// The pixel format.
- /// The vertical dense matrix.
- /// The horizontal dense matrix.
- /// The source frame.
- /// The target row base reference.
- /// The current row.
- /// The current column.
- /// The minimum working area row.
- /// The maximum working area row.
- /// The minimum working area column.
- /// The maximum working area column.
- [MethodImpl(InliningOptions.ShortMethod)]
- public static void Convolve2D3(
- in DenseMatrix matrixY,
- in DenseMatrix matrixX,
- Buffer2D sourcePixels,
- ref Vector4 targetRowRef,
- int row,
- int column,
- int minRow,
- int maxRow,
- int minColumn,
- int maxColumn)
- where TPixel : unmanaged, IPixel
- {
- Convolve2DImpl(
- in matrixY,
- in matrixX,
- sourcePixels,
- row,
- column,
- minRow,
- maxRow,
- minColumn,
- maxColumn,
- out Vector4 vector);
-
- ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
- vector.W = target.W;
-
- Numerics.UnPremultiply(ref vector);
- target = vector;
- }
-
- ///
- /// Computes the sum of vectors in the span referenced by weighted by the two kernel weight values.
- /// Using this method the convolution filter is applied to alpha in addition to the color channels.
- ///
- /// The pixel format.
- /// The vertical dense matrix.
- /// The horizontal dense matrix.
- /// The source frame.
- /// The target row base reference.
- /// The current row.
- /// The current column.
- /// The minimum working area row.
- /// The maximum working area row.
- /// The minimum working area column.
- /// The maximum working area column.
- [MethodImpl(InliningOptions.ShortMethod)]
- public static void Convolve2D4(
- in DenseMatrix matrixY,
- in DenseMatrix matrixX,
- Buffer2D sourcePixels,
- ref Vector4 targetRowRef,
- int row,
- int column,
- int minRow,
- int maxRow,
- int minColumn,
- int maxColumn)
- where TPixel : unmanaged, IPixel
- {
- Convolve2DImpl(
- in matrixY,
- in matrixX,
- sourcePixels,
- row,
- column,
- minRow,
- maxRow,
- minColumn,
- maxColumn,
- out Vector4 vector);
-
- ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
- Numerics.UnPremultiply(ref vector);
- target = vector;
- }
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public static void Convolve2DImpl(
- in DenseMatrix matrixY,
- in DenseMatrix matrixX,
- Buffer2D sourcePixels,
- int row,
- int column,
- int minRow,
- int maxRow,
- int minColumn,
- int maxColumn,
- out Vector4 vector)
- where TPixel : unmanaged, IPixel
- {
- Vector4 vectorY = default;
- Vector4 vectorX = default;
- int matrixHeight = matrixY.Rows;
- int matrixWidth = matrixY.Columns;
- int radiusY = matrixHeight >> 1;
- int radiusX = matrixWidth >> 1;
- int sourceOffsetColumnBase = column + minColumn;
-
- for (int y = 0; y < matrixHeight; y++)
- {
- int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow);
- Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY);
-
- for (int x = 0; x < matrixWidth; x++)
- {
- int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn);
- var currentColor = sourceRowSpan[offsetX].ToVector4();
- Numerics.Premultiply(ref currentColor);
-
- vectorX += matrixX[y, x] * currentColor;
- vectorY += matrixY[y, x] * currentColor;
- }
- }
-
- vector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY));
- }
-
- ///
- /// Computes the sum of vectors in the span referenced by weighted by the kernel weight values.
- /// Using this method the convolution filter is not applied to alpha in addition to the color channels.
- ///
- /// The pixel format.
- /// The dense matrix.
- /// The source frame.
- /// The target row base reference.
- /// The current row.
- /// The current column.
- /// The minimum working area row.
- /// The maximum working area row.
- /// The minimum working area column.
- /// The maximum working area column.
- [MethodImpl(InliningOptions.ShortMethod)]
- public static void Convolve3(
- in DenseMatrix matrix,
- Buffer2D sourcePixels,
- ref Vector4 targetRowRef,
- int row,
- int column,
- int minRow,
- int maxRow,
- int minColumn,
- int maxColumn)
- where TPixel : unmanaged, IPixel
- {
- Vector4 vector = default;
-
- ConvolveImpl(
- in matrix,
- sourcePixels,
- row,
- column,
- minRow,
- maxRow,
- minColumn,
- maxColumn,
- ref vector);
-
- ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
- vector.W = target.W;
-
- Numerics.UnPremultiply(ref vector);
- target = vector;
- }
-
- ///
- /// Computes the sum of vectors in the span referenced by weighted by the kernel weight values.
- /// Using this method the convolution filter is applied to alpha in addition to the color channels.
- ///
- /// The pixel format.
- /// The dense matrix.
- /// The source frame.
- /// The target row base reference.
- /// The current row.
- /// The current column.
- /// The minimum working area row.
- /// The maximum working area row.
- /// The minimum working area column.
- /// The maximum working area column.
- [MethodImpl(InliningOptions.ShortMethod)]
- public static void Convolve4(
- in DenseMatrix matrix,
- Buffer2D sourcePixels,
- ref Vector4 targetRowRef,
- int row,
- int column,
- int minRow,
- int maxRow,
- int minColumn,
- int maxColumn)
- where TPixel : unmanaged, IPixel
- {
- Vector4 vector = default;
-
- ConvolveImpl(
- in matrix,
- sourcePixels,
- row,
- column,
- minRow,
- maxRow,
- minColumn,
- maxColumn,
- ref vector);
-
- ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
- Numerics.UnPremultiply(ref vector);
- target = vector;
- }
-
- [MethodImpl(InliningOptions.ShortMethod)]
- private static void ConvolveImpl(
- in DenseMatrix matrix,
- Buffer2D sourcePixels,
- int row,
- int column,
- int minRow,
- int maxRow,
- int minColumn,
- int maxColumn,
- ref Vector4 vector)
- where TPixel : unmanaged, IPixel
- {
- int matrixHeight = matrix.Rows;
- int matrixWidth = matrix.Columns;
- int radiusY = matrixHeight >> 1;
- int radiusX = matrixWidth >> 1;
- int sourceOffsetColumnBase = column + minColumn;
-
- for (int y = 0; y < matrixHeight; y++)
- {
- int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow);
- Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY);
-
- for (int x = 0; x < matrixWidth; x++)
- {
- int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn);
- var currentColor = sourceRowSpan[offsetX].ToVector4();
- Numerics.Premultiply(ref currentColor);
- vector += matrix[y, x] * currentColor;
- }
- }
- }
- }
-}
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index b760301167..475d64bc4f 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -7,6 +7,7 @@ using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
+using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
{
@@ -22,6 +23,20 @@ namespace SixLabors.ImageSharp
private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
+ private static ReadOnlySpan ShuffleMaskShiftAlpha =>
+ new byte[]
+ {
+ 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
+ 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
+ };
+
+ public static ReadOnlySpan PermuteMaskShiftAlpha8x32 =>
+ new byte[]
+ {
+ 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
+ 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
+ };
+
///
/// Shuffle single-precision (32-bit) floating-point elements in
/// using the control and store the results in .
@@ -789,6 +804,138 @@ namespace SixLabors.ImageSharp
}
}
}
+
+ internal static void PackFromRgbPlanesAvx2Reduce(
+ ref ReadOnlySpan redChannel,
+ ref ReadOnlySpan greenChannel,
+ ref ReadOnlySpan blueChannel,
+ ref Span destination)
+ {
+ ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel));
+ ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel));
+ ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel));
+ ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination));
+
+ int count = redChannel.Length / Vector256.Count;
+
+ ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
+ Vector256 control1 = Unsafe.As>(ref control1Bytes);
+
+ ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
+ Vector256 control2 = Unsafe.As>(ref control2Bytes);
+
+ Vector256 a = Vector256.Create((byte)255);
+
+ Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
+
+ for (int i = 0; i < count; i++)
+ {
+ Vector256 r0 = Unsafe.Add(ref rBase, i);
+ Vector256 g0 = Unsafe.Add(ref gBase, i);
+ Vector256 b0 = Unsafe.Add(ref bBase, i);
+
+ r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
+ g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
+ b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
+
+ Vector256 rg = Avx2.UnpackLow(r0, g0);
+ Vector256 b1 = Avx2.UnpackLow(b0, a);
+
+ Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+ Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+ rg = Avx2.UnpackHigh(r0, g0);
+ b1 = Avx2.UnpackHigh(b0, a);
+
+ Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+ Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+ rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha);
+ rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha);
+ rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha);
+ rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha);
+
+ rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte();
+ rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte();
+ rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte();
+ rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte();
+
+ ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i);
+ ref byte d2 = ref Unsafe.Add(ref d1, 24);
+ ref byte d3 = ref Unsafe.Add(ref d2, 24);
+ ref byte d4 = ref Unsafe.Add(ref d3, 24);
+
+ Unsafe.As>(ref d1) = rgb1;
+ Unsafe.As>(ref d2) = rgb2;
+ Unsafe.As>(ref d3) = rgb3;
+ Unsafe.As>(ref d4) = rgb4;
+ }
+
+ int slice = count * Vector256.Count;
+ redChannel = redChannel.Slice(slice);
+ greenChannel = greenChannel.Slice(slice);
+ blueChannel = blueChannel.Slice(slice);
+ destination = destination.Slice(slice);
+ }
+
+ internal static void PackFromRgbPlanesAvx2Reduce(
+ ref ReadOnlySpan redChannel,
+ ref ReadOnlySpan greenChannel,
+ ref ReadOnlySpan blueChannel,
+ ref Span destination)
+ {
+ ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel));
+ ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel));
+ ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel));
+ ref Vector256 dBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+
+ int count = redChannel.Length / Vector256.Count;
+
+ ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
+ Vector256 control1 = Unsafe.As>(ref control1Bytes);
+
+ ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
+ Vector256 control2 = Unsafe.As>(ref control2Bytes);
+
+ Vector256 a = Vector256.Create((byte)255);
+
+ Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
+
+ for (int i = 0; i < count; i++)
+ {
+ Vector256 r0 = Unsafe.Add(ref rBase, i);
+ Vector256 g0 = Unsafe.Add(ref gBase, i);
+ Vector256 b0 = Unsafe.Add(ref bBase, i);
+
+ r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
+ g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
+ b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
+
+ Vector256 rg = Avx2.UnpackLow(r0, g0);
+ Vector256 b1 = Avx2.UnpackLow(b0, a);
+
+ Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+ Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+ rg = Avx2.UnpackHigh(r0, g0);
+ b1 = Avx2.UnpackHigh(b0, a);
+
+ Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+ Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+ ref Vector256 d0 = ref Unsafe.Add(ref dBase, i * 4);
+ d0 = rgb1;
+ Unsafe.Add(ref d0, 1) = rgb2;
+ Unsafe.Add(ref d0, 2) = rgb3;
+ Unsafe.Add(ref d0, 3) = rgb4;
+ }
+
+ int slice = count * Vector256.Count;
+ redChannel = redChannel.Slice(slice);
+ greenChannel = greenChannel.Slice(slice);
+ blueChannel = blueChannel.Slice(slice);
+ destination = destination.Slice(slice);
+ }
}
}
}
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
new file mode 100644
index 0000000000..fe02bd0072
--- /dev/null
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
@@ -0,0 +1,206 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.PixelFormats;
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
+namespace SixLabors.ImageSharp
+{
+ internal static partial class SimdUtils
+ {
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void PackFromRgbPlanes(
+ Configuration configuration,
+ ReadOnlySpan redChannel,
+ ReadOnlySpan greenChannel,
+ ReadOnlySpan blueChannel,
+ Span destination)
+ {
+ DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
+ DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
+ DebugGuard.IsTrue(destination.Length > redChannel.Length + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
+ {
+ HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+ }
+ else
+#endif
+ {
+ PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+ }
+
+ PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void PackFromRgbPlanes(
+ Configuration configuration,
+ ReadOnlySpan redChannel,
+ ReadOnlySpan greenChannel,
+ ReadOnlySpan blueChannel,
+ Span destination)
+ {
+ DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
+ DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
+ DebugGuard.IsTrue(destination.Length > redChannel.Length, nameof(destination), "'destination' span should not be shorter than the source channels!");
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
+ {
+ HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+ }
+ else
+#endif
+ {
+ PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+ }
+
+ PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
+ }
+
+ private static void PackFromRgbPlanesScalarBatchedReduce(
+ ref ReadOnlySpan redChannel,
+ ref ReadOnlySpan greenChannel,
+ ref ReadOnlySpan blueChannel,
+ ref Span destination)
+ {
+ ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel));
+ ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel));
+ ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel));
+ ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
+
+ int count = redChannel.Length / 4;
+ for (int i = 0; i < count; i++)
+ {
+ ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
+ ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
+ ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
+ ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
+
+ ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
+ ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
+ ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
+
+ d0.R = rr.V0;
+ d0.G = gg.V0;
+ d0.B = bb.V0;
+
+ d1.R = rr.V1;
+ d1.G = gg.V1;
+ d1.B = bb.V1;
+
+ d2.R = rr.V2;
+ d2.G = gg.V2;
+ d2.B = bb.V2;
+
+ d3.R = rr.V3;
+ d3.G = gg.V3;
+ d3.B = bb.V3;
+ }
+
+ int finished = count * 4;
+ redChannel = redChannel.Slice(finished);
+ greenChannel = greenChannel.Slice(finished);
+ blueChannel = blueChannel.Slice(finished);
+ destination = destination.Slice(finished);
+ }
+
+ private static void PackFromRgbPlanesScalarBatchedReduce(
+ ref ReadOnlySpan redChannel,
+ ref ReadOnlySpan greenChannel,
+ ref ReadOnlySpan blueChannel,
+ ref Span destination)
+ {
+ ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel));
+ ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel));
+ ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel));
+ ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination);
+
+ int count = redChannel.Length / 4;
+ destination.Fill(new Rgba32(0, 0, 0, 255));
+ for (int i = 0; i < count; i++)
+ {
+ ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4);
+ ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1);
+ ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2);
+ ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3);
+
+ ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
+ ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
+ ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
+
+ d0.R = rr.V0;
+ d0.G = gg.V0;
+ d0.B = bb.V0;
+
+ d1.R = rr.V1;
+ d1.G = gg.V1;
+ d1.B = bb.V1;
+
+ d2.R = rr.V2;
+ d2.G = gg.V2;
+ d2.B = bb.V2;
+
+ d3.R = rr.V3;
+ d3.G = gg.V3;
+ d3.B = bb.V3;
+ }
+
+ int finished = count * 4;
+ redChannel = redChannel.Slice(finished);
+ greenChannel = greenChannel.Slice(finished);
+ blueChannel = blueChannel.Slice(finished);
+ destination = destination.Slice(finished);
+ }
+
+ private static void PackFromRgbPlanesRemainder(
+ ReadOnlySpan redChannel,
+ ReadOnlySpan greenChannel,
+ ReadOnlySpan blueChannel,
+ Span destination)
+ {
+ ref byte r = ref MemoryMarshal.GetReference(redChannel);
+ ref byte g = ref MemoryMarshal.GetReference(greenChannel);
+ ref byte b = ref MemoryMarshal.GetReference(blueChannel);
+ ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
+
+ for (int i = 0; i < destination.Length; i++)
+ {
+ ref Rgb24 d = ref Unsafe.Add(ref rgb, i);
+ d.R = Unsafe.Add(ref r, i);
+ d.G = Unsafe.Add(ref g, i);
+ d.B = Unsafe.Add(ref b, i);
+ }
+ }
+
+ private static void PackFromRgbPlanesRemainder(
+ ReadOnlySpan redChannel,
+ ReadOnlySpan greenChannel,
+ ReadOnlySpan blueChannel,
+ Span destination)
+ {
+ ref byte r = ref MemoryMarshal.GetReference(redChannel);
+ ref byte g = ref MemoryMarshal.GetReference(greenChannel);
+ ref byte b = ref MemoryMarshal.GetReference(blueChannel);
+ ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination);
+
+ for (int i = 0; i < destination.Length; i++)
+ {
+ ref Rgba32 d = ref Unsafe.Add(ref rgba, i);
+ d.R = Unsafe.Add(ref r, i);
+ d.G = Unsafe.Add(ref g, i);
+ d.B = Unsafe.Add(ref b, i);
+ d.A = 255;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs
index aaf6d405cf..6d82cfad01 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs
@@ -6,6 +6,7 @@ using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.PixelFormats;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
@@ -220,5 +221,13 @@ namespace SixLabors.ImageSharp
nameof(source),
$"length should be divisible by {shouldBeDivisibleBy}!");
}
+
+ private struct ByteTuple4
+ {
+ public byte V0;
+ public byte V1;
+ public byte V2;
+ public byte V3;
+ }
}
}
diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj
index 1d7fb2958b..a90aaf715a 100644
--- a/src/ImageSharp/ImageSharp.csproj
+++ b/src/ImageSharp/ImageSharp.csproj
@@ -24,16 +24,16 @@
-
+
-
+
-
+
-
-
+
+
diff --git a/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs b/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs
index a7a51f77dd..0c35c88286 100644
--- a/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs
+++ b/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs
@@ -53,8 +53,13 @@ namespace SixLabors.ImageSharp.Memory
{
ThrowObjectDisposedException();
}
-
+#if SUPPORTS_CREATESPAN
+ ref byte r0 = ref MemoryMarshal.GetReference(this.Data);
+ return MemoryMarshal.CreateSpan(ref Unsafe.As(ref r0), this.length);
+#else
return MemoryMarshal.Cast(this.Data.AsSpan()).Slice(0, this.length);
+#endif
+
}
///
diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs
index 73b656f363..f345f58bcd 100644
--- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs
+++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs
@@ -21,6 +21,23 @@ namespace SixLabors.ImageSharp.PixelFormats
///
public override PixelTypeInfo GetPixelTypeInfo() => LazyInfo.Value;
+
+ ///
+ internal override void PackFromRgbPlanes(
+ Configuration configuration,
+ ReadOnlySpan redChannel,
+ ReadOnlySpan greenChannel,
+ ReadOnlySpan blueChannel,
+ Span destination)
+ {
+ Guard.NotNull(configuration, nameof(configuration));
+ int count = redChannel.Length;
+ Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!");
+ Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!");
+ Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
+
+ SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination);
+ }
}
}
}
diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs
index d8322e37d4..9633059774 100644
--- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs
+++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs
@@ -56,6 +56,23 @@ namespace SixLabors.ImageSharp.PixelFormats
MemoryMarshal.Cast(sourceVectors),
MemoryMarshal.Cast(destinationPixels));
}
+
+ ///
+ internal override void PackFromRgbPlanes(
+ Configuration configuration,
+ ReadOnlySpan redChannel,
+ ReadOnlySpan greenChannel,
+ ReadOnlySpan blueChannel,
+ Span destination)
+ {
+ Guard.NotNull(configuration, nameof(configuration));
+ int count = redChannel.Length;
+ Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!");
+ Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!");
+ Guard.IsTrue(destination.Length > count, nameof(destination), "'destination' span should not be shorter than the source channels!");
+
+ SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination);
+ }
}
}
}
diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
index dbe06702d9..c5450538e4 100644
--- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
+++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
@@ -4,6 +4,8 @@
using System;
using System.Buffers;
using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Formats;
using SixLabors.ImageSharp.Memory;
@@ -159,5 +161,45 @@ namespace SixLabors.ImageSharp.PixelFormats
PixelOperations.Instance.From(configuration, sourcePixels, destinationPixels);
}
+
+ ///
+ /// Bulk operation that packs 3 seperate RGB channels to .
+ /// The destination must have a padding of 3.
+ ///
+ /// A to configure internal operations.
+ /// A to the red values.
+ /// A to the green values.
+ /// A to the blue values.
+ /// A to the destination pixels.
+ internal virtual void PackFromRgbPlanes(
+ Configuration configuration,
+ ReadOnlySpan redChannel,
+ ReadOnlySpan greenChannel,
+ ReadOnlySpan blueChannel,
+ Span destination)
+ {
+ Guard.NotNull(configuration, nameof(configuration));
+
+ int count = redChannel.Length;
+ Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!");
+ Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!");
+ Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
+
+ Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination));
+
+ Rgb24 rgb24 = default;
+ ref byte r = ref MemoryMarshal.GetReference(redChannel);
+ ref byte g = ref MemoryMarshal.GetReference(greenChannel);
+ ref byte b = ref MemoryMarshal.GetReference(blueChannel);
+ ref TPixel d = ref MemoryMarshal.GetReference(destination);
+
+ for (int i = 0; i < count; i++)
+ {
+ rgb24.R = Unsafe.Add(ref r, i);
+ rgb24.G = Unsafe.Add(ref g, i);
+ rgb24.B = Unsafe.Add(ref b, i);
+ Unsafe.Add(ref d, i).FromRgb24(rgb24);
+ }
+ }
}
}
diff --git a/src/ImageSharp/Primitives/DenseMatrix{T}.cs b/src/ImageSharp/Primitives/DenseMatrix{T}.cs
index e312703368..60dadb617b 100644
--- a/src/ImageSharp/Primitives/DenseMatrix{T}.cs
+++ b/src/ImageSharp/Primitives/DenseMatrix{T}.cs
@@ -109,7 +109,7 @@ namespace SixLabors.ImageSharp
/// The at the specified position.
public ref T this[int row, int column]
{
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
this.CheckCoordinates(row, column);
@@ -124,7 +124,7 @@ namespace SixLabors.ImageSharp
///
/// The representation on the source data.
///
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static implicit operator DenseMatrix(T[,] data) => new DenseMatrix(data);
///
@@ -134,7 +134,7 @@ namespace SixLabors.ImageSharp
///
/// The representation on the source data.
///
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
#pragma warning disable SA1008 // Opening parenthesis should be spaced correctly
public static implicit operator T[,](in DenseMatrix data)
#pragma warning restore SA1008 // Opening parenthesis should be spaced correctly
@@ -175,7 +175,7 @@ namespace SixLabors.ImageSharp
/// Transposes the rows and columns of the dense matrix.
///
/// The .
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public DenseMatrix Transpose()
{
var result = new DenseMatrix(this.Rows, this.Columns);
@@ -196,13 +196,13 @@ namespace SixLabors.ImageSharp
/// Fills the matrix with the given value
///
/// The value to fill each item with
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Fill(T value) => this.Span.Fill(value);
///
/// Clears the matrix setting each value to the default value for the element type
///
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Clear() => this.Span.Clear();
///
@@ -232,14 +232,14 @@ namespace SixLabors.ImageSharp
=> obj is DenseMatrix other && this.Equals(other);
///
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool Equals(DenseMatrix other) =>
this.Columns == other.Columns
&& this.Rows == other.Rows
&& this.Span.SequenceEqual(other.Span);
///
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public override int GetHashCode()
{
HashCode code = default;
diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs
index 3a5f35cd14..bb559019b7 100644
--- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs
+++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs
@@ -1,10 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
-using System;
using System.Numerics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@@ -43,12 +40,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
}
///
- /// Gets the horizontal gradient operator.
+ /// Gets the horizontal convolution kernel.
///
public DenseMatrix KernelX { get; }
///
- /// Gets the vertical gradient operator.
+ /// Gets the vertical convolution kernel.
///
public DenseMatrix KernelY { get; }
@@ -60,102 +57,39 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
///
protected override void OnFrameApply(ImageFrame source)
{
- using Buffer2D targetPixels = this.Configuration.MemoryAllocator.Allocate2D(source.Width, source.Height);
+ MemoryAllocator allocator = this.Configuration.MemoryAllocator;
+ using Buffer2D targetPixels = allocator.Allocate2D(source.Width, source.Height);
source.CopyTo(targetPixels);
var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds());
- var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelY, this.KernelX, this.Configuration, this.PreserveAlpha);
- ParallelRowIterator.IterateRows(
- this.Configuration,
- interest,
- in operation);
+ // We use a rectangle 3x the interest width to allocate a buffer big enough
+ // for source and target bulk pixel conversion.
+ var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 3, interest.Height);
- Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels);
- }
-
- ///
- /// A implementing the convolution logic for .
- ///
- private readonly struct RowOperation : IRowOperation
- {
- private readonly Rectangle bounds;
- private readonly int maxY;
- private readonly int maxX;
- private readonly Buffer2D targetPixels;
- private readonly Buffer2D sourcePixels;
- private readonly DenseMatrix kernelY;
- private readonly DenseMatrix kernelX;
- private readonly Configuration configuration;
- private readonly bool preserveAlpha;
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public RowOperation(
- Rectangle bounds,
- Buffer2D targetPixels,
- Buffer2D sourcePixels,
- DenseMatrix kernelY,
- DenseMatrix kernelX,
- Configuration configuration,
- bool preserveAlpha)
- {
- this.bounds = bounds;
- this.maxY = this.bounds.Bottom - 1;
- this.maxX = this.bounds.Right - 1;
- this.targetPixels = targetPixels;
- this.sourcePixels = sourcePixels;
- this.kernelY = kernelY;
- this.kernelX = kernelX;
- this.configuration = configuration;
- this.preserveAlpha = preserveAlpha;
- }
-
- ///
- [MethodImpl(InliningOptions.ShortMethod)]
- public void Invoke(int y, Span span)
+ using (var map = new KernelSamplingMap(allocator))
{
- ref Vector4 spanRef = ref MemoryMarshal.GetReference(span);
- Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X);
- PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span);
+ // Since the kernel sizes are identical we can use a single map.
+ map.BuildSamplingOffsetMap(this.KernelY, interest);
- if (this.preserveAlpha)
- {
- for (int x = 0; x < this.bounds.Width; x++)
- {
- DenseMatrixUtils.Convolve2D3(
- in this.kernelY,
- in this.kernelX,
- this.sourcePixels,
- ref spanRef,
- y,
- x,
- this.bounds.Y,
- this.maxY,
- this.bounds.X,
- this.maxX);
- }
- }
- else
- {
- for (int x = 0; x < this.bounds.Width; x++)
- {
- DenseMatrixUtils.Convolve2D4(
- in this.kernelY,
- in this.kernelX,
- this.sourcePixels,
- ref spanRef,
- y,
- x,
- this.bounds.Y,
- this.maxY,
- this.bounds.X,
- this.maxX);
- }
- }
+ var operation = new Convolution2DRowOperation(
+ interest,
+ targetPixels,
+ source.PixelBuffer,
+ map,
+ this.KernelY,
+ this.KernelX,
+ this.Configuration,
+ this.PreserveAlpha);
- PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
+ ParallelRowIterator.IterateRows, Vector4>(
+ this.Configuration,
+ operationBounds,
+ in operation);
}
+
+ Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels);
}
}
}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs
new file mode 100644
index 0000000000..802d1809f2
--- /dev/null
+++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs
@@ -0,0 +1,193 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.Memory;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Processing.Processors.Convolution
+{
+ ///
+ /// A implementing the logic for 2D convolution.
+ ///
+ internal readonly struct Convolution2DRowOperation : IRowOperation
+ where TPixel : unmanaged, IPixel
+ {
+ private readonly Rectangle bounds;
+ private readonly Buffer2D targetPixels;
+ private readonly Buffer2D sourcePixels;
+ private readonly KernelSamplingMap map;
+ private readonly DenseMatrix kernelMatrixY;
+ private readonly DenseMatrix kernelMatrixX;
+ private readonly Configuration configuration;
+ private readonly bool preserveAlpha;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public Convolution2DRowOperation(
+ Rectangle bounds,
+ Buffer2D targetPixels,
+ Buffer2D sourcePixels,
+ KernelSamplingMap map,
+ DenseMatrix kernelMatrixY,
+ DenseMatrix kernelMatrixX,
+ Configuration configuration,
+ bool preserveAlpha)
+ {
+ this.bounds = bounds;
+ this.targetPixels = targetPixels;
+ this.sourcePixels = sourcePixels;
+ this.map = map;
+ this.kernelMatrixY = kernelMatrixY;
+ this.kernelMatrixX = kernelMatrixX;
+ this.configuration = configuration;
+ this.preserveAlpha = preserveAlpha;
+ }
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Invoke(int y, Span span)
+ {
+ if (this.preserveAlpha)
+ {
+ this.Convolve3(y, span);
+ }
+ else
+ {
+ this.Convolve4(y, span);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void Convolve3(int y, Span span)
+ {
+ // Span is 3x bounds.
+ int boundsX = this.bounds.X;
+ int boundsWidth = this.bounds.Width;
+ Span sourceBuffer = span.Slice(0, boundsWidth);
+ Span targetYBuffer = span.Slice(boundsWidth, boundsWidth);
+ Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth);
+
+ var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map);
+ ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
+
+ // Clear the target buffers for each row run.
+ targetYBuffer.Clear();
+ targetXBuffer.Clear();
+ ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer);
+ ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer);
+
+ ReadOnlyKernel kernelY = state.KernelY;
+ ReadOnlyKernel kernelX = state.KernelX;
+ Span sourceRow;
+ for (int kY = 0; kY < kernelY.Rows; kY++)
+ {
+ // Get the precalculated source sample row for this kernel row and copy to our buffer.
+ int sampleY = Unsafe.Add(ref sampleRowBase, kY);
+ sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
+
+ for (int x = 0; x < sourceBuffer.Length; x++)
+ {
+ ref int sampleColumnBase = ref state.GetSampleColumn(x);
+ ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x);
+ ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x);
+
+ for (int kX = 0; kX < kernelY.Columns; kX++)
+ {
+ int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
+ Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
+ targetY += kernelX[kY, kX] * sample;
+ targetX += kernelY[kY, kX] * sample;
+ }
+ }
+ }
+
+ // Now we need to combine the values and copy the original alpha values
+ // from the source row.
+ sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ for (int x = 0; x < sourceRow.Length; x++)
+ {
+ ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x);
+ Vector4 vectorY = target;
+ Vector4 vectorX = Unsafe.Add(ref targetBaseX, x);
+
+ target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY));
+ target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W;
+ }
+
+ Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRowSpan);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void Convolve4(int y, Span span)
+ {
+ // Span is 3x bounds.
+ int boundsX = this.bounds.X;
+ int boundsWidth = this.bounds.Width;
+ Span sourceBuffer = span.Slice(0, boundsWidth);
+ Span targetYBuffer = span.Slice(boundsWidth, boundsWidth);
+ Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth);
+
+ var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map);
+ ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
+
+ // Clear the target buffers for each row run.
+ targetYBuffer.Clear();
+ targetXBuffer.Clear();
+ ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer);
+ ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer);
+
+ ReadOnlyKernel kernelY = state.KernelY;
+ ReadOnlyKernel kernelX = state.KernelX;
+ for (int kY = 0; kY < kernelY.Rows; kY++)
+ {
+ // Get the precalculated source sample row for this kernel row and copy to our buffer.
+ int sampleY = Unsafe.Add(ref sampleRowBase, kY);
+ Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ Numerics.Premultiply(sourceBuffer);
+ ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
+
+ for (int x = 0; x < sourceBuffer.Length; x++)
+ {
+ ref int sampleColumnBase = ref state.GetSampleColumn(x);
+ ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x);
+ ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x);
+
+ for (int kX = 0; kX < kernelY.Columns; kX++)
+ {
+ int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
+ Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
+ targetY += kernelX[kY, kX] * sample;
+ targetX += kernelY[kY, kX] * sample;
+ }
+ }
+ }
+
+ // Now we need to combine the values
+ for (int x = 0; x < targetYBuffer.Length; x++)
+ {
+ ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x);
+ Vector4 vectorY = target;
+ Vector4 vectorX = Unsafe.Add(ref targetBaseX, x);
+
+ target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY));
+ }
+
+ Numerics.UnPremultiply(targetYBuffer);
+
+ Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRow);
+ }
+ }
+}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs
new file mode 100644
index 0000000000..218093ac4e
--- /dev/null
+++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs
@@ -0,0 +1,54 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp.Processing.Processors.Convolution
+{
+ ///
+ /// A stack only struct used for reducing reference indirection during 2D convolution operations.
+ ///
+ internal readonly ref struct Convolution2DState
+ {
+ private readonly Span rowOffsetMap;
+ private readonly Span columnOffsetMap;
+ private readonly int kernelHeight;
+ private readonly int kernelWidth;
+
+ public Convolution2DState(
+ in DenseMatrix kernelY,
+ in DenseMatrix kernelX,
+ KernelSamplingMap map)
+ {
+ // We check the kernels are the same size upstream.
+ this.KernelY = new ReadOnlyKernel(kernelY);
+ this.KernelX = new ReadOnlyKernel(kernelX);
+ this.kernelHeight = kernelY.Rows;
+ this.kernelWidth = kernelY.Columns;
+ this.rowOffsetMap = map.GetRowOffsetSpan();
+ this.columnOffsetMap = map.GetColumnOffsetSpan();
+ }
+
+ public readonly ReadOnlyKernel KernelY
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get;
+ }
+
+ public readonly ReadOnlyKernel KernelX
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public readonly ref int GetSampleRow(int row)
+ => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public readonly ref int GetSampleColumn(int column)
+ => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth);
+ }
+}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs
index b61690415a..151b0ffccc 100644
--- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs
+++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs
@@ -42,12 +42,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
}
///
- /// Gets the horizontal gradient operator.
+ /// Gets the horizontal convolution kernel.
///
public DenseMatrix KernelX { get; }
///
- /// Gets the vertical gradient operator.
+ /// Gets the vertical convolution kernel.
///
public DenseMatrix KernelY { get; }
@@ -63,96 +63,48 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds());
- // Horizontal convolution
- var horizontalOperation = new RowOperation(interest, firstPassPixels, source.PixelBuffer, this.KernelX, this.Configuration, this.PreserveAlpha);
- ParallelRowIterator.IterateRows(
- this.Configuration,
- interest,
- in horizontalOperation);
+ // We use a rectangle 2x the interest width to allocate a buffer big enough
+ // for source and target bulk pixel conversion.
+ var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height);
- // Vertical convolution
- var verticalOperation = new RowOperation(interest, source.PixelBuffer, firstPassPixels, this.KernelY, this.Configuration, this.PreserveAlpha);
- ParallelRowIterator.IterateRows(
- this.Configuration,
- interest,
- in verticalOperation);
- }
-
- ///
- /// A implementing the convolution logic for .
- ///
- private readonly struct RowOperation : IRowOperation
- {
- private readonly Rectangle bounds;
- private readonly Buffer2D targetPixels;
- private readonly Buffer2D sourcePixels;
- private readonly DenseMatrix kernel;
- private readonly Configuration configuration;
- private readonly bool preserveAlpha;
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public RowOperation(
- Rectangle bounds,
- Buffer2D targetPixels,
- Buffer2D sourcePixels,
- DenseMatrix kernel,
- Configuration configuration,
- bool preserveAlpha)
+ using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator))
{
- this.bounds = bounds;
- this.targetPixels = targetPixels;
- this.sourcePixels = sourcePixels;
- this.kernel = kernel;
- this.configuration = configuration;
- this.preserveAlpha = preserveAlpha;
+ mapX.BuildSamplingOffsetMap(this.KernelX, interest);
+
+ // Horizontal convolution
+ var horizontalOperation = new ConvolutionRowOperation(
+ interest,
+ firstPassPixels,
+ source.PixelBuffer,
+ mapX,
+ this.KernelX,
+ this.Configuration,
+ this.PreserveAlpha);
+
+ ParallelRowIterator.IterateRows, Vector4>(
+ this.Configuration,
+ operationBounds,
+ in horizontalOperation);
}
- ///
- [MethodImpl(InliningOptions.ShortMethod)]
- public void Invoke(int y, Span span)
+ using (var mapY = new KernelSamplingMap(this.Configuration.MemoryAllocator))
{
- ref Vector4 spanRef = ref MemoryMarshal.GetReference(span);
-
- int maxY = this.bounds.Bottom - 1;
- int maxX = this.bounds.Right - 1;
-
- Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X);
- PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span);
-
- if (this.preserveAlpha)
- {
- for (int x = 0; x < this.bounds.Width; x++)
- {
- DenseMatrixUtils.Convolve3(
- in this.kernel,
- this.sourcePixels,
- ref spanRef,
- y,
- x,
- this.bounds.Y,
- maxY,
- this.bounds.X,
- maxX);
- }
- }
- else
- {
- for (int x = 0; x < this.bounds.Width; x++)
- {
- DenseMatrixUtils.Convolve4(
- in this.kernel,
- this.sourcePixels,
- ref spanRef,
- y,
- x,
- this.bounds.Y,
- maxY,
- this.bounds.X,
- maxX);
- }
- }
-
- PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
+ mapY.BuildSamplingOffsetMap(this.KernelY, interest);
+
+ // Vertical convolution
+ var verticalOperation = new ConvolutionRowOperation(
+ interest,
+ source.PixelBuffer,
+ firstPassPixels,
+ mapY,
+ this.KernelY,
+ this.Configuration,
+ this.PreserveAlpha);
+
+ ParallelRowIterator.IterateRows, Vector4>(
+ this.Configuration,
+ operationBounds,
+ in verticalOperation);
}
}
}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs
index 95fef15f62..924a1125bd 100644
--- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs
+++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs
@@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
}
///
- /// Gets the 2d gradient operator.
+ /// Gets the 2d convolution kernel.
///
public DenseMatrix KernelXY { get; }
@@ -51,16 +51,26 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
///
protected override void OnFrameApply(ImageFrame source)
{
- using Buffer2D targetPixels = this.Configuration.MemoryAllocator.Allocate2D(source.Size());
+ MemoryAllocator allocator = this.Configuration.MemoryAllocator;
+ using Buffer2D targetPixels = allocator.Allocate2D(source.Size());
source.CopyTo(targetPixels);
var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds());
- var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelXY, this.Configuration, this.PreserveAlpha);
- ParallelRowIterator.IterateRows(
- this.Configuration,
- interest,
- in operation);
+
+ // We use a rectangle 2x the interest width to allocate a buffer big enough
+ // for source and target bulk pixel conversion.
+ var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height);
+ using (var map = new KernelSamplingMap(allocator))
+ {
+ map.BuildSamplingOffsetMap(this.KernelXY, interest);
+
+ var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha);
+ ParallelRowIterator.IterateRows(
+ this.Configuration,
+ operationBounds,
+ in operation);
+ }
Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels);
}
@@ -71,10 +81,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
private readonly struct RowOperation : IRowOperation
{
private readonly Rectangle bounds;
- private readonly int maxY;
- private readonly int maxX;
private readonly Buffer2D targetPixels;
private readonly Buffer2D sourcePixels;
+ private readonly KernelSamplingMap map;
private readonly DenseMatrix kernel;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
@@ -84,15 +93,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
Rectangle bounds,
Buffer2D targetPixels,
Buffer2D sourcePixels,
+ KernelSamplingMap map,
DenseMatrix kernel,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
- this.maxY = this.bounds.Bottom - 1;
- this.maxX = this.bounds.Right - 1;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
+ this.map = map;
this.kernel = kernel;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
@@ -102,45 +111,93 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
[MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(int y, Span span)
{
- ref Vector4 spanRef = ref MemoryMarshal.GetReference(span);
+ // Span is 2x bounds.
+ int boundsX = this.bounds.X;
+ int boundsWidth = this.bounds.Width;
+ Span sourceBuffer = span.Slice(0, this.bounds.Width);
+ Span targetBuffer = span.Slice(this.bounds.Width);
+
+ ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span);
+ Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
- Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X);
- PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span);
+ var state = new ConvolutionState(in this.kernel, this.map);
+ int row = y - this.bounds.Y;
+ ref int sampleRowBase = ref state.GetSampleRow(row);
if (this.preserveAlpha)
{
- for (int x = 0; x < this.bounds.Width; x++)
+ // Clear the target buffer for each row run.
+ targetBuffer.Clear();
+ ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
+
+ Span sourceRow;
+ for (int kY = 0; kY < state.Kernel.Rows; kY++)
+ {
+ // Get the precalculated source sample row for this kernel row and copy to our buffer.
+ int offsetY = Unsafe.Add(ref sampleRowBase, kY);
+ sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
+
+ for (int x = 0; x < sourceBuffer.Length; x++)
+ {
+ ref int sampleColumnBase = ref state.GetSampleColumn(x);
+ ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
+
+ for (int kX = 0; kX < state.Kernel.Columns; kX++)
+ {
+ int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
+ Vector4 sample = Unsafe.Add(ref sourceBase, offsetX);
+ target += state.Kernel[kY, kX] * sample;
+ }
+ }
+ }
+
+ // Now we need to copy the original alpha values from the source row.
+ sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ for (int x = 0; x < sourceRow.Length; x++)
{
- DenseMatrixUtils.Convolve3(
- in this.kernel,
- this.sourcePixels,
- ref spanRef,
- y,
- x,
- this.bounds.Y,
- this.maxY,
- this.bounds.X,
- this.maxX);
+ ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
+ target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W;
}
}
else
{
- for (int x = 0; x < this.bounds.Width; x++)
+ // Clear the target buffer for each row run.
+ targetBuffer.Clear();
+ ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
+
+ for (int kY = 0; kY < state.Kernel.Rows; kY++)
{
- DenseMatrixUtils.Convolve4(
- in this.kernel,
- this.sourcePixels,
- ref spanRef,
- y,
- x,
- this.bounds.Y,
- this.maxY,
- this.bounds.X,
- this.maxX);
+ // Get the precalculated source sample row for this kernel row and copy to our buffer.
+ int offsetY = Unsafe.Add(ref sampleRowBase, kY);
+ Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ Numerics.Premultiply(sourceBuffer);
+ ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
+
+ for (int x = 0; x < sourceBuffer.Length; x++)
+ {
+ ref int sampleColumnBase = ref state.GetSampleColumn(x);
+ ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
+
+ for (int kX = 0; kX < state.Kernel.Columns; kX++)
+ {
+ int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
+ Vector4 sample = Unsafe.Add(ref sourceBase, offsetX);
+ target += state.Kernel[kY, kX] * sample;
+ }
+ }
}
+
+ Numerics.UnPremultiply(targetBuffer);
}
- PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
+ PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan);
}
}
}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs
new file mode 100644
index 0000000000..9876b2885b
--- /dev/null
+++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs
@@ -0,0 +1,163 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.Memory;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Processing.Processors.Convolution
+{
+ ///
+ /// A implementing the logic for 1D convolution.
+ ///
+ internal readonly struct ConvolutionRowOperation : IRowOperation
+ where TPixel : unmanaged, IPixel
+ {
+ private readonly Rectangle bounds;
+ private readonly Buffer2D targetPixels;
+ private readonly Buffer2D sourcePixels;
+ private readonly KernelSamplingMap map;
+ private readonly DenseMatrix kernelMatrix;
+ private readonly Configuration configuration;
+ private readonly bool preserveAlpha;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ConvolutionRowOperation(
+ Rectangle bounds,
+ Buffer2D targetPixels,
+ Buffer2D sourcePixels,
+ KernelSamplingMap map,
+ DenseMatrix kernelMatrix,
+ Configuration configuration,
+ bool preserveAlpha)
+ {
+ this.bounds = bounds;
+ this.targetPixels = targetPixels;
+ this.sourcePixels = sourcePixels;
+ this.map = map;
+ this.kernelMatrix = kernelMatrix;
+ this.configuration = configuration;
+ this.preserveAlpha = preserveAlpha;
+ }
+
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Invoke(int y, Span span)
+ {
+ if (this.preserveAlpha)
+ {
+ this.Convolve3(y, span);
+ }
+ else
+ {
+ this.Convolve4(y, span);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void Convolve3(int y, Span span)
+ {
+ // Span is 2x bounds.
+ int boundsX = this.bounds.X;
+ int boundsWidth = this.bounds.Width;
+ Span sourceBuffer = span.Slice(0, this.bounds.Width);
+ Span targetBuffer = span.Slice(this.bounds.Width);
+
+ var state = new ConvolutionState(in this.kernelMatrix, this.map);
+ ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
+
+ // Clear the target buffer for each row run.
+ targetBuffer.Clear();
+ ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
+
+ ReadOnlyKernel kernel = state.Kernel;
+ Span sourceRow;
+ for (int kY = 0; kY < kernel.Rows; kY++)
+ {
+ // Get the precalculated source sample row for this kernel row and copy to our buffer.
+ int sampleY = Unsafe.Add(ref sampleRowBase, kY);
+ sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
+
+ for (int x = 0; x < sourceBuffer.Length; x++)
+ {
+ ref int sampleColumnBase = ref state.GetSampleColumn(x);
+ ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
+
+ for (int kX = 0; kX < kernel.Columns; kX++)
+ {
+ int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
+ Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
+ target += kernel[kY, kX] * sample;
+ }
+ }
+ }
+
+ // Now we need to copy the original alpha values from the source row.
+ sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ for (int x = 0; x < sourceRow.Length; x++)
+ {
+ ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
+ target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W;
+ }
+
+ Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void Convolve4(int y, Span span)
+ {
+ // Span is 2x bounds.
+ int boundsX = this.bounds.X;
+ int boundsWidth = this.bounds.Width;
+ Span sourceBuffer = span.Slice(0, this.bounds.Width);
+ Span targetBuffer = span.Slice(this.bounds.Width);
+
+ var state = new ConvolutionState(in this.kernelMatrix, this.map);
+ ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
+
+ // Clear the target buffer for each row run.
+ targetBuffer.Clear();
+ ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
+
+ ReadOnlyKernel kernel = state.Kernel;
+ for (int kY = 0; kY < kernel.Rows; kY++)
+ {
+ // Get the precalculated source sample row for this kernel row and copy to our buffer.
+ int sampleY = Unsafe.Add(ref sampleRowBase, kY);
+ Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
+
+ Numerics.Premultiply(sourceBuffer);
+ ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
+
+ for (int x = 0; x < sourceBuffer.Length; x++)
+ {
+ ref int sampleColumnBase = ref state.GetSampleColumn(x);
+ ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
+
+ for (int kX = 0; kX < kernel.Columns; kX++)
+ {
+ int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
+ Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
+ target += kernel[kY, kX] * sample;
+ }
+ }
+ }
+
+ Numerics.UnPremultiply(targetBuffer);
+
+ Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
+ PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
+ }
+ }
+}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs
new file mode 100644
index 0000000000..3f296c67df
--- /dev/null
+++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs
@@ -0,0 +1,45 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp.Processing.Processors.Convolution
+{
+ ///
+ /// A stack only struct used for reducing reference indirection during convolution operations.
+ ///
+ internal readonly ref struct ConvolutionState
+ {
+ private readonly Span rowOffsetMap;
+ private readonly Span columnOffsetMap;
+ private readonly int kernelHeight;
+ private readonly int kernelWidth;
+
+ public ConvolutionState(
+ in DenseMatrix kernel,
+ KernelSamplingMap map)
+ {
+ this.Kernel = new ReadOnlyKernel(kernel);
+ this.kernelHeight = kernel.Rows;
+ this.kernelWidth = kernel.Columns;
+ this.rowOffsetMap = map.GetRowOffsetSpan();
+ this.columnOffsetMap = map.GetColumnOffsetSpan();
+ }
+
+ public readonly ReadOnlyKernel Kernel
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public readonly ref int GetSampleRow(int row)
+ => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public readonly ref int GetSampleColumn(int column)
+ => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth);
+ }
+}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs
new file mode 100644
index 0000000000..e4b7dbea09
--- /dev/null
+++ b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs
@@ -0,0 +1,102 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Memory;
+
+namespace SixLabors.ImageSharp.Processing.Processors.Convolution
+{
+ ///
+ /// Provides a map of the convolution kernel sampling offsets.
+ ///
+ internal sealed class KernelSamplingMap : IDisposable
+ {
+ private readonly MemoryAllocator allocator;
+ private bool isDisposed;
+ private IMemoryOwner yOffsets;
+ private IMemoryOwner xOffsets;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The memory allocator.
+ public KernelSamplingMap(MemoryAllocator allocator) => this.allocator = allocator;
+
+ ///
+ /// Builds a map of the sampling offsets for the kernel clamped by the given bounds.
+ ///
+ /// The convolution kernel.
+ /// The source bounds.
+ public void BuildSamplingOffsetMap(DenseMatrix kernel, Rectangle bounds)
+ {
+ int kernelHeight = kernel.Rows;
+ int kernelWidth = kernel.Columns;
+ this.yOffsets = this.allocator.Allocate(bounds.Height * kernelHeight);
+ this.xOffsets = this.allocator.Allocate(bounds.Width * kernelWidth);
+
+ int minY = bounds.Y;
+ int maxY = bounds.Bottom - 1;
+ int minX = bounds.X;
+ int maxX = bounds.Right - 1;
+
+ int radiusY = kernelHeight >> 1;
+ int radiusX = kernelWidth >> 1;
+
+ // Calculate the y and x sampling offsets clamped to the given rectangle.
+ // While this isn't a hotpath we still dip into unsafe to avoid the span bounds
+ // checks as the can potentially be looping over large arrays.
+ Span ySpan = this.yOffsets.GetSpan();
+ ref int ySpanBase = ref MemoryMarshal.GetReference(ySpan);
+ for (int row = 0; row < bounds.Height; row++)
+ {
+ int rowBase = row * kernelHeight;
+ for (int y = 0; y < kernelHeight; y++)
+ {
+ Unsafe.Add(ref ySpanBase, rowBase + y) = row + y + minY - radiusY;
+ }
+ }
+
+ if (kernelHeight > 1)
+ {
+ Numerics.Clamp(ySpan, minY, maxY);
+ }
+
+ Span xSpan = this.xOffsets.GetSpan();
+ ref int xSpanBase = ref MemoryMarshal.GetReference(xSpan);
+ for (int column = 0; column < bounds.Width; column++)
+ {
+ int columnBase = column * kernelWidth;
+ for (int x = 0; x < kernelWidth; x++)
+ {
+ Unsafe.Add(ref xSpanBase, columnBase + x) = column + x + minX - radiusX;
+ }
+ }
+
+ if (kernelWidth > 1)
+ {
+ Numerics.Clamp(xSpan, minX, maxX);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public Span GetRowOffsetSpan() => this.yOffsets.GetSpan();
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public Span GetColumnOffsetSpan() => this.xOffsets.GetSpan();
+
+ ///
+ public void Dispose()
+ {
+ if (!this.isDisposed)
+ {
+ this.yOffsets.Dispose();
+ this.xOffsets.Dispose();
+
+ this.isDisposed = true;
+ }
+ }
+ }
+}
diff --git a/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs b/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs
new file mode 100644
index 0000000000..37e0060054
--- /dev/null
+++ b/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs
@@ -0,0 +1,63 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp.Processing.Processors.Convolution
+{
+ ///
+ /// A stack only, readonly, kernel matrix that can be indexed without
+ /// bounds checks when compiled in release mode.
+ ///
+ internal readonly ref struct ReadOnlyKernel
+ {
+ private readonly ReadOnlySpan values;
+
+ public ReadOnlyKernel(DenseMatrix matrix)
+ {
+ this.Columns = matrix.Columns;
+ this.Rows = matrix.Rows;
+ this.values = matrix.Span;
+ }
+
+ public int Columns
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get;
+ }
+
+ public int Rows
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get;
+ }
+
+ public float this[int row, int column]
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get
+ {
+ this.CheckCoordinates(row, column);
+ ref float vBase = ref MemoryMarshal.GetReference(this.values);
+ return Unsafe.Add(ref vBase, (row * this.Columns) + column);
+ }
+ }
+
+ [Conditional("DEBUG")]
+ private void CheckCoordinates(int row, int column)
+ {
+ if (row < 0 || row >= this.Rows)
+ {
+ throw new ArgumentOutOfRangeException(nameof(row), row, $"{row} is outwith the matrix bounds.");
+ }
+
+ if (column < 0 || column >= this.Columns)
+ {
+ throw new ArgumentOutOfRangeException(nameof(column), column, $"{column} is outwith the matrix bounds.");
+ }
+ }
+ }
+}
diff --git a/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs
index 4dc9e41960..d0c8ff40d7 100644
--- a/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs
+++ b/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs
@@ -72,11 +72,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Filters
public void Invoke(int y, Span span)
{
Span rowSpan = this.source.GetPixelRowSpan(y).Slice(this.startX, span.Length);
- PixelOperations.Instance.ToVector4(this.configuration, rowSpan, span);
+ PixelOperations.Instance.ToVector4(this.configuration, rowSpan, span, PixelConversionModifiers.Scale);
ColorNumerics.Transform(span, ref Unsafe.AsRef(this.matrix));
- PixelOperations.Instance.FromVector4Destructive(this.configuration, span, rowSpan);
+ PixelOperations.Instance.FromVector4Destructive(this.configuration, span, rowSpan, PixelConversionModifiers.Scale);
}
}
}
diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs
index 4c9f6c06db..d08e2f2d66 100644
--- a/tests/ImageSharp.Benchmarks/Config.cs
+++ b/tests/ImageSharp.Benchmarks/Config.cs
@@ -27,6 +27,14 @@ namespace SixLabors.ImageSharp.Benchmarks
}
+ public class MultiFramework : Config
+ {
+ public MultiFramework() => this.AddJob(
+ Job.Default.WithRuntime(ClrRuntime.Net472),
+ Job.Default.WithRuntime(CoreRuntime.Core21),
+ Job.Default.WithRuntime(CoreRuntime.Core31));
+ }
+
public class ShortClr : Config
{
public ShortClr() => this.AddJob(
diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs
new file mode 100644
index 0000000000..eade8e0c43
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs
@@ -0,0 +1,286 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+using BenchmarkDotNet.Attributes;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
+{
+ public unsafe class PixelConversion_PackFromRgbPlanes
+ {
+ private byte[] rBuf;
+ private byte[] gBuf;
+ private byte[] bBuf;
+ private Rgb24[] rgbBuf;
+ private Rgba32[] rgbaBuf;
+
+ private float[] rFloat;
+ private float[] gFloat;
+ private float[] bFloat;
+
+ private float[] rgbaFloat;
+
+ [Params(1024)]
+ public int Count { get; set; }
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ this.rBuf = new byte[this.Count];
+ this.gBuf = new byte[this.Count];
+ this.bBuf = new byte[this.Count];
+ this.rgbBuf = new Rgb24[this.Count + 3]; // padded
+ this.rgbaBuf = new Rgba32[this.Count];
+
+ this.rFloat = new float[this.Count];
+ this.gFloat = new float[this.Count];
+ this.bFloat = new float[this.Count];
+
+ this.rgbaFloat = new float[this.Count * 4];
+ }
+
+ // [Benchmark]
+ public void Rgb24_Scalar_PerElement_Pinned()
+ {
+ fixed (byte* r = &this.rBuf[0])
+ fixed (byte* g = &this.gBuf[0])
+ fixed (byte* b = &this.bBuf[0])
+ fixed (Rgb24* rgb = &this.rgbBuf[0])
+ {
+ for (int i = 0; i < this.Count; i++)
+ {
+ Rgb24* d = rgb + i;
+ d->R = r[i];
+ d->G = g[i];
+ d->B = b[i];
+ }
+ }
+ }
+
+ [Benchmark]
+ public void Rgb24_Scalar_PerElement_Span()
+ {
+ Span r = this.rBuf;
+ Span g = this.rBuf;
+ Span b = this.rBuf;
+ Span rgb = this.rgbBuf;
+
+ for (int i = 0; i < r.Length; i++)
+ {
+ ref Rgb24 d = ref rgb[i];
+ d.R = r[i];
+ d.G = g[i];
+ d.B = b[i];
+ }
+ }
+
+ [Benchmark]
+ public void Rgb24_Scalar_PerElement_Unsafe()
+ {
+ ref byte r = ref this.rBuf[0];
+ ref byte g = ref this.rBuf[0];
+ ref byte b = ref this.rBuf[0];
+ ref Rgb24 rgb = ref this.rgbBuf[0];
+
+ for (int i = 0; i < this.Count; i++)
+ {
+ ref Rgb24 d = ref Unsafe.Add(ref rgb, i);
+ d.R = Unsafe.Add(ref r, i);
+ d.G = Unsafe.Add(ref g, i);
+ d.B = Unsafe.Add(ref b, i);
+ }
+ }
+
+ [Benchmark]
+ public void Rgb24_Scalar_PerElement_Batched8()
+ {
+ ref Byte8 r = ref Unsafe.As(ref this.rBuf[0]);
+ ref Byte8 g = ref Unsafe.As(ref this.rBuf[0]);
+ ref Byte8 b = ref Unsafe.As(ref this.rBuf[0]);
+ ref Rgb24 rgb = ref this.rgbBuf[0];
+
+ int count = this.Count / 8;
+ for (int i = 0; i < count; i++)
+ {
+ ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 8);
+ ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
+ ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
+ ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
+ ref Rgb24 d4 = ref Unsafe.Add(ref d0, 4);
+ ref Rgb24 d5 = ref Unsafe.Add(ref d0, 5);
+ ref Rgb24 d6 = ref Unsafe.Add(ref d0, 6);
+ ref Rgb24 d7 = ref Unsafe.Add(ref d0, 7);
+
+ ref Byte8 rr = ref Unsafe.Add(ref r, i);
+ ref Byte8 gg = ref Unsafe.Add(ref g, i);
+ ref Byte8 bb = ref Unsafe.Add(ref b, i);
+
+ d0.R = rr.V0;
+ d0.G = gg.V0;
+ d0.B = bb.V0;
+
+ d1.R = rr.V1;
+ d1.G = gg.V1;
+ d1.B = bb.V1;
+
+ d2.R = rr.V2;
+ d2.G = gg.V2;
+ d2.B = bb.V2;
+
+ d3.R = rr.V3;
+ d3.G = gg.V3;
+ d3.B = bb.V3;
+
+ d4.R = rr.V4;
+ d4.G = gg.V4;
+ d4.B = bb.V4;
+
+ d5.R = rr.V5;
+ d5.G = gg.V5;
+ d5.B = bb.V5;
+
+ d6.R = rr.V6;
+ d6.G = gg.V6;
+ d6.B = bb.V6;
+
+ d7.R = rr.V7;
+ d7.G = gg.V7;
+ d7.B = bb.V7;
+ }
+ }
+
+ [Benchmark]
+ public void Rgb24_Scalar_PerElement_Batched4()
+ {
+ ref Byte4 r = ref Unsafe.As(ref this.rBuf[0]);
+ ref Byte4 g = ref Unsafe.As(ref this.rBuf[0]);
+ ref Byte4 b = ref Unsafe.As(ref this.rBuf[0]);
+ ref Rgb24 rgb = ref this.rgbBuf[0];
+
+ int count = this.Count / 4;
+ for (int i = 0; i < count; i++)
+ {
+ ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
+ ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
+ ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
+ ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
+
+ ref Byte4 rr = ref Unsafe.Add(ref r, i);
+ ref Byte4 gg = ref Unsafe.Add(ref g, i);
+ ref Byte4 bb = ref Unsafe.Add(ref b, i);
+
+ d0.R = rr.V0;
+ d0.G = gg.V0;
+ d0.B = bb.V0;
+
+ d1.R = rr.V1;
+ d1.G = gg.V1;
+ d1.B = bb.V1;
+
+ d2.R = rr.V2;
+ d2.G = gg.V2;
+ d2.B = bb.V2;
+
+ d3.R = rr.V3;
+ d3.G = gg.V3;
+ d3.B = bb.V3;
+ }
+ }
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ [Benchmark(Baseline = true)]
+ public void Rgba32_Avx2_Float()
+ {
+ ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]);
+ ref Vector256 gBase = ref Unsafe.As>(ref this.gFloat[0]);
+ ref Vector256 bBase = ref Unsafe.As>(ref this.bFloat[0]);
+ ref Vector256 resultBase = ref Unsafe.As>(ref this.rgbaFloat[0]);
+
+ int count = this.Count / Vector256.Count;
+
+ ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
+ Vector256 vcontrol = Unsafe.As>(ref control);
+
+ var va = Vector256.Create(1F);
+
+ for (int i = 0; i < count; i++)
+ {
+ Vector256 r = Unsafe.Add(ref rBase, i);
+ Vector256 g = Unsafe.Add(ref gBase, i);
+ Vector256 b = Unsafe.Add(ref bBase, i);
+
+ r = Avx2.PermuteVar8x32(r, vcontrol);
+ g = Avx2.PermuteVar8x32(g, vcontrol);
+ b = Avx2.PermuteVar8x32(b, vcontrol);
+
+ Vector256 vte = Avx.UnpackLow(r, b);
+ Vector256 vto = Avx.UnpackLow(g, va);
+
+ ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4);
+
+ destination = Avx.UnpackLow(vte, vto);
+ Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
+
+ vte = Avx.UnpackHigh(r, b);
+ vto = Avx.UnpackHigh(g, va);
+
+ Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
+ Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
+ }
+ }
+
+ [Benchmark]
+ public void Rgb24_Avx2_Bytes()
+ {
+ ReadOnlySpan r = this.rBuf;
+ ReadOnlySpan g = this.rBuf;
+ ReadOnlySpan b = this.rBuf;
+ Span rgb = this.rgbBuf;
+ SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb);
+ }
+
+ [Benchmark]
+ public void Rgba32_Avx2_Bytes()
+ {
+ ReadOnlySpan r = this.rBuf;
+ ReadOnlySpan g = this.rBuf;
+ ReadOnlySpan b = this.rBuf;
+ Span rgb = this.rgbaBuf;
+ SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb);
+ }
+#endif
+
+#pragma warning disable SA1132
+ private struct Byte8
+ {
+ public byte V0, V1, V2, V3, V4, V5, V6, V7;
+ }
+
+ private struct Byte4
+ {
+ public byte V0, V1, V2, V3;
+ }
+#pragma warning restore
+
+ // Results @ Anton's PC, 2020 Dec 05
+ // .NET Core 3.1.1
+ // Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
+ //
+ // | Method | Count | Mean | Error | StdDev | Ratio | RatioSD |
+ // |--------------------------------- |------ |-----------:|---------:|---------:|------:|--------:|
+ // | Rgb24_Scalar_PerElement_Span | 1024 | 1,634.6 ns | 26.56 ns | 24.84 ns | 3.12 | 0.05 |
+ // | Rgb24_Scalar_PerElement_Unsafe | 1024 | 1,284.7 ns | 4.70 ns | 4.16 ns | 2.46 | 0.01 |
+ // | Rgb24_Scalar_PerElement_Batched8 | 1024 | 1,182.3 ns | 5.12 ns | 4.27 ns | 2.26 | 0.01 |
+ // | Rgb24_Scalar_PerElement_Batched4 | 1024 | 1,146.2 ns | 16.38 ns | 14.52 ns | 2.19 | 0.02 |
+ // | Rgba32_Avx2_Float | 1024 | 522.7 ns | 1.78 ns | 1.39 ns | 1.00 | 0.00 |
+ // | Rgb24_Avx2_Bytes | 1024 | 243.3 ns | 1.56 ns | 1.30 ns | 0.47 | 0.00 |
+ // | Rgba32_Avx2_Bytes | 1024 | 146.0 ns | 2.48 ns | 2.32 ns | 0.28 | 0.01 |
+ }
+}
\ No newline at end of file
diff --git a/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs b/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs
index 62d5806037..8f009e58f1 100644
--- a/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs
+++ b/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs
@@ -7,7 +7,7 @@ using SixLabors.ImageSharp.Processing;
namespace SixLabors.ImageSharp.Benchmarks.Samplers
{
- [Config(typeof(Config.ShortClr))]
+ [Config(typeof(Config.MultiFramework))]
public class GaussianBlur
{
[Benchmark]
diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
index ec09e43e57..1f680aa6cc 100644
--- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
@@ -5,8 +5,10 @@ using System;
using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using SixLabors.ImageSharp.Common.Tuples;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics.X86;
+#endif
+using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
@@ -169,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Common
public static readonly TheoryData ArbitraryArraySizes =
new TheoryData
{
- 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520,
+ 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520,
};
[Theory]
@@ -336,90 +338,135 @@ namespace SixLabors.ImageSharp.Tests.Common
}
}
- private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
- int count,
- Action,
- Memory> convert,
- int seed = -1)
+ [Theory]
+ [MemberData(nameof(ArbitraryArraySizes))]
+ public void PackFromRgbPlanes_Rgb24(int count)
{
- seed = seed > 0 ? seed : count;
- float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f);
- byte[] expected = source.Select(NormalizedFloatToByte).ToArray();
- var actual = new byte[count];
-
- convert(source, actual);
-
- Assert.Equal(expected, actual);
+ TestPackFromRgbPlanes(
+ count,
+ (r, g, b, actual) =>
+ SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual));
}
- private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f));
-
[Theory]
- [InlineData(0)]
- [InlineData(7)]
- [InlineData(42)]
- [InlineData(255)]
- [InlineData(256)]
- [InlineData(257)]
- private void MagicConvertToByte(float value)
+ [MemberData(nameof(ArbitraryArraySizes))]
+ public void PackFromRgbPlanes_Rgba32(int count)
{
- byte actual = MagicConvert(value / 256f);
- var expected = (byte)value;
-
- Assert.Equal(expected, actual);
+ TestPackFromRgbPlanes(
+ count,
+ (r, g, b, actual) =>
+ SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual));
}
+#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
- private void BulkConvertNormalizedFloatToByte_Step()
+ public void PackFromRgbPlanesAvx2Reduce_Rgb24()
{
- if (this.SkipOnNonAvx2())
+ if (!Avx2.IsSupported)
{
return;
}
- float[] source = { 0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f };
+ byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray();
+ byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray();
+ byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray();
+ const int padding = 4;
+ Rgb24[] d = new Rgb24[32 + padding];
- byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray();
+ ReadOnlySpan rr = r.AsSpan();
+ ReadOnlySpan gg = g.AsSpan();
+ ReadOnlySpan bb = b.AsSpan();
+ Span dd = d.AsSpan();
- source = source.Select(f => f / 255f).ToArray();
+ SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd);
- Span dest = stackalloc byte[8];
-
- this.MagicConvert(source, dest);
+ for (int i = 0; i < 32; i++)
+ {
+ Assert.Equal(i, d[i].R);
+ Assert.Equal(i + 100, d[i].G);
+ Assert.Equal(i + 200, d[i].B);
+ }
- Assert.True(dest.SequenceEqual(expected));
+ Assert.Equal(0, rr.Length);
+ Assert.Equal(0, gg.Length);
+ Assert.Equal(0, bb.Length);
+ Assert.Equal(padding, dd.Length);
}
- private static byte MagicConvert(float x)
+ [Fact]
+ public void PackFromRgbPlanesAvx2Reduce_Rgba32()
{
- float f = 32768.0f + x;
- uint i = Unsafe.As(ref f);
- return (byte)i;
- }
+ if (!Avx2.IsSupported)
+ {
+ return;
+ }
- private void MagicConvert(Span source, Span dest)
- {
- var magick = new Vector(32768.0f);
+ byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray();
+ byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray();
+ byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray();
- var scale = new Vector(255f) / new Vector(256f);
+ Rgba32[] d = new Rgba32[32];
- Vector x = MemoryMarshal.Cast>(source)[0];
+ ReadOnlySpan rr = r.AsSpan();
+ ReadOnlySpan gg = g.AsSpan();
+ ReadOnlySpan bb = b.AsSpan();
+ Span dd = d.AsSpan();
+
+ SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd);
+
+ for (int i = 0; i < 32; i++)
+ {
+ Assert.Equal(i, d[i].R);
+ Assert.Equal(i + 100, d[i].G);
+ Assert.Equal(i + 200, d[i].B);
+ Assert.Equal(255, d[i].A);
+ }
- x = (x * scale) + magick;
+ Assert.Equal(0, rr.Length);
+ Assert.Equal(0, gg.Length);
+ Assert.Equal(0, bb.Length);
+ Assert.Equal(0, dd.Length);
+ }
+#endif
+
+ internal static void TestPackFromRgbPlanes(int count, Action packMethod)
+ where TPixel : unmanaged, IPixel
+ {
+ Random rnd = new Random(42);
+ byte[] r = rnd.GenerateRandomByteArray(count);
+ byte[] g = rnd.GenerateRandomByteArray(count);
+ byte[] b = rnd.GenerateRandomByteArray(count);
+
+ TPixel[] expected = new TPixel[count];
+ for (int i = 0; i < count; i++)
+ {
+ expected[i].FromRgb24(new Rgb24(r[i], g[i], b[i]));
+ }
- Tuple8.OfUInt32 ii = default;
+ TPixel[] actual = new TPixel[count + 3]; // padding for Rgb24 AVX2
+ packMethod(r, g, b, actual);
- ref Vector iiRef = ref Unsafe.As>(ref ii);
+ Assert.True(expected.AsSpan().SequenceEqual(actual.AsSpan().Slice(0, count)));
+ }
- iiRef = x;
+ private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
+ int count,
+ Action,
+ Memory> convert,
+ int seed = -1)
+ {
+ seed = seed > 0 ? seed : count;
+ float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f);
+ byte[] expected = source.Select(NormalizedFloatToByte).ToArray();
+ var actual = new byte[count];
- ref Tuple8.OfByte d = ref MemoryMarshal.Cast(dest)[0];
- d.LoadFrom(ref ii);
+ convert(source, actual);
- this.Output.WriteLine(ii.ToString());
- this.Output.WriteLine(d.ToString());
+ Assert.Equal(expected, actual);
}
+ private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f));
+
private static void AssertEvenRoundIsCorrect(Vector r, Vector v)
{
for (int i = 0; i < Vector