diff --git a/.gitattributes b/.gitattributes index c0bff6e189..7c648c0774 100644 --- a/.gitattributes +++ b/.gitattributes @@ -80,8 +80,11 @@ *.pvr binary *.snk binary *.tga binary +*.tif binary +*.tiff binary *.ttc binary *.ttf binary +*.wbmp binary *.webp binary *.woff binary *.woff2 binary diff --git a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs b/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs deleted file mode 100644 index f265bdd517..0000000000 --- a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.PixelFormats; - -namespace SixLabors.ImageSharp -{ - /// - /// Extension methods for . - /// TODO: One day rewrite all this to use SIMD intrinsics. There's a lot of scope for improvement. - /// - internal static class DenseMatrixUtils - { - /// - /// Computes the sum of vectors in the span referenced by weighted by the two kernel weight values. - /// Using this method the convolution filter is not applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The vertical dense matrix. - /// The horizontal dense matrix. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. - [MethodImpl(InliningOptions.ShortMethod)] - public static void Convolve2D3( - in DenseMatrix matrixY, - in DenseMatrix matrixX, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) - where TPixel : unmanaged, IPixel - { - Convolve2DImpl( - in matrixY, - in matrixX, - sourcePixels, - row, - column, - minRow, - maxRow, - minColumn, - maxColumn, - out Vector4 vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - vector.W = target.W; - - Numerics.UnPremultiply(ref vector); - target = vector; - } - - /// - /// Computes the sum of vectors in the span referenced by weighted by the two kernel weight values. - /// Using this method the convolution filter is applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The vertical dense matrix. - /// The horizontal dense matrix. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. - [MethodImpl(InliningOptions.ShortMethod)] - public static void Convolve2D4( - in DenseMatrix matrixY, - in DenseMatrix matrixX, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) - where TPixel : unmanaged, IPixel - { - Convolve2DImpl( - in matrixY, - in matrixX, - sourcePixels, - row, - column, - minRow, - maxRow, - minColumn, - maxColumn, - out Vector4 vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - Numerics.UnPremultiply(ref vector); - target = vector; - } - - [MethodImpl(InliningOptions.ShortMethod)] - public static void Convolve2DImpl( - in DenseMatrix matrixY, - in DenseMatrix matrixX, - Buffer2D sourcePixels, - int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn, - out Vector4 vector) - where TPixel : unmanaged, IPixel - { - Vector4 vectorY = default; - Vector4 vectorX = default; - int matrixHeight = matrixY.Rows; - int matrixWidth = matrixY.Columns; - int radiusY = matrixHeight >> 1; - int radiusX = matrixWidth >> 1; - int sourceOffsetColumnBase = column + minColumn; - - for (int y = 0; y < matrixHeight; y++) - { - int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow); - Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY); - - for (int x = 0; x < matrixWidth; x++) - { - int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn); - var currentColor = sourceRowSpan[offsetX].ToVector4(); - Numerics.Premultiply(ref currentColor); - - vectorX += matrixX[y, x] * currentColor; - vectorY += matrixY[y, x] * currentColor; - } - } - - vector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); - } - - /// - /// Computes the sum of vectors in the span referenced by weighted by the kernel weight values. - /// Using this method the convolution filter is not applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The dense matrix. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. - [MethodImpl(InliningOptions.ShortMethod)] - public static void Convolve3( - in DenseMatrix matrix, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) - where TPixel : unmanaged, IPixel - { - Vector4 vector = default; - - ConvolveImpl( - in matrix, - sourcePixels, - row, - column, - minRow, - maxRow, - minColumn, - maxColumn, - ref vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - vector.W = target.W; - - Numerics.UnPremultiply(ref vector); - target = vector; - } - - /// - /// Computes the sum of vectors in the span referenced by weighted by the kernel weight values. - /// Using this method the convolution filter is applied to alpha in addition to the color channels. - /// - /// The pixel format. - /// The dense matrix. - /// The source frame. - /// The target row base reference. - /// The current row. - /// The current column. - /// The minimum working area row. - /// The maximum working area row. - /// The minimum working area column. - /// The maximum working area column. - [MethodImpl(InliningOptions.ShortMethod)] - public static void Convolve4( - in DenseMatrix matrix, - Buffer2D sourcePixels, - ref Vector4 targetRowRef, - int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn) - where TPixel : unmanaged, IPixel - { - Vector4 vector = default; - - ConvolveImpl( - in matrix, - sourcePixels, - row, - column, - minRow, - maxRow, - minColumn, - maxColumn, - ref vector); - - ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column); - Numerics.UnPremultiply(ref vector); - target = vector; - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void ConvolveImpl( - in DenseMatrix matrix, - Buffer2D sourcePixels, - int row, - int column, - int minRow, - int maxRow, - int minColumn, - int maxColumn, - ref Vector4 vector) - where TPixel : unmanaged, IPixel - { - int matrixHeight = matrix.Rows; - int matrixWidth = matrix.Columns; - int radiusY = matrixHeight >> 1; - int radiusX = matrixWidth >> 1; - int sourceOffsetColumnBase = column + minColumn; - - for (int y = 0; y < matrixHeight; y++) - { - int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow); - Span sourceRowSpan = sourcePixels.GetRowSpan(offsetY); - - for (int x = 0; x < matrixWidth; x++) - { - int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn); - var currentColor = sourceRowSpan[offsetX].ToVector4(); - Numerics.Premultiply(ref currentColor); - vector += matrix[y, x] * currentColor; - } - } - } - } -} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index b760301167..475d64bc4f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -7,6 +7,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp { @@ -22,6 +23,20 @@ namespace SixLabors.ImageSharp private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + private static ReadOnlySpan ShuffleMaskShiftAlpha => + new byte[] + { + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15, + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 + }; + + public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => + new byte[] + { + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, + 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 + }; + /// /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . @@ -789,6 +804,138 @@ namespace SixLabors.ImageSharp } } } + + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha); + rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha); + rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha); + rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha); + + rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte(); + rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte(); + rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte(); + rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte(); + + ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i); + ref byte d2 = ref Unsafe.Add(ref d1, 24); + ref byte d3 = ref Unsafe.Add(ref d2, 24); + ref byte d4 = ref Unsafe.Add(ref d3, 24); + + Unsafe.As>(ref d1) = rgb1; + Unsafe.As>(ref d2) = rgb2; + Unsafe.As>(ref d3) = rgb3; + Unsafe.As>(ref d4) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } + + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref Vector256 dBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + ref Vector256 d0 = ref Unsafe.Add(ref dBase, i * 4); + d0 = rgb1; + Unsafe.Add(ref d0, 1) = rgb2; + Unsafe.Add(ref d0, 2) = rgb3; + Unsafe.Add(ref d0, 3) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs new file mode 100644 index 0000000000..fe02bd0072 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -0,0 +1,206 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.PixelFormats; + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(destination.Length > redChannel.Length + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + else +#endif + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); + } + + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(destination.Length > redChannel.Length, nameof(destination), "'destination' span should not be shorter than the source channels!"); + +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + else +#endif + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); + } + + private static void PackFromRgbPlanesScalarBatchedReduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); + ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); + ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + + int count = redChannel.Length / 4; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + + ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); + ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); + ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + } + + int finished = count * 4; + redChannel = redChannel.Slice(finished); + greenChannel = greenChannel.Slice(finished); + blueChannel = blueChannel.Slice(finished); + destination = destination.Slice(finished); + } + + private static void PackFromRgbPlanesScalarBatchedReduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); + ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); + ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); + ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination); + + int count = redChannel.Length / 4; + destination.Fill(new Rgba32(0, 0, 0, 255)); + for (int i = 0; i < count; i++) + { + ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3); + + ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); + ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); + ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + } + + int finished = count * 4; + redChannel = redChannel.Slice(finished); + greenChannel = greenChannel.Slice(finished); + blueChannel = blueChannel.Slice(finished); + destination = destination.Slice(finished); + } + + private static void PackFromRgbPlanesRemainder( + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + + for (int i = 0; i < destination.Length; i++) + { + ref Rgb24 d = ref Unsafe.Add(ref rgb, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + } + } + + private static void PackFromRgbPlanesRemainder( + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination); + + for (int i = 0; i < destination.Length; i++) + { + ref Rgba32 d = ref Unsafe.Add(ref rgba, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + d.A = 255; + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index aaf6d405cf..6d82cfad01 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using SixLabors.ImageSharp.PixelFormats; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -220,5 +221,13 @@ namespace SixLabors.ImageSharp nameof(source), $"length should be divisible by {shouldBeDivisibleBy}!"); } + + private struct ByteTuple4 + { + public byte V0; + public byte V1; + public byte V2; + public byte V3; + } } } diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index 1d7fb2958b..a90aaf715a 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -24,16 +24,16 @@ - + - + - + - - + + diff --git a/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs b/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs index a7a51f77dd..0c35c88286 100644 --- a/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs +++ b/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs @@ -53,8 +53,13 @@ namespace SixLabors.ImageSharp.Memory { ThrowObjectDisposedException(); } - +#if SUPPORTS_CREATESPAN + ref byte r0 = ref MemoryMarshal.GetReference(this.Data); + return MemoryMarshal.CreateSpan(ref Unsafe.As(ref r0), this.length); +#else return MemoryMarshal.Cast(this.Data.AsSpan()).Slice(0, this.length); +#endif + } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs index 73b656f363..f345f58bcd 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs @@ -21,6 +21,23 @@ namespace SixLabors.ImageSharp.PixelFormats /// public override PixelTypeInfo GetPixelTypeInfo() => LazyInfo.Value; + + /// + internal override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + Guard.NotNull(configuration, nameof(configuration)); + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + + SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs index d8322e37d4..9633059774 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs @@ -56,6 +56,23 @@ namespace SixLabors.ImageSharp.PixelFormats MemoryMarshal.Cast(sourceVectors), MemoryMarshal.Cast(destinationPixels)); } + + /// + internal override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + Guard.NotNull(configuration, nameof(configuration)); + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count, nameof(destination), "'destination' span should not be shorter than the source channels!"); + + SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs index dbe06702d9..c5450538e4 100644 --- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs +++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs @@ -4,6 +4,8 @@ using System; using System.Buffers; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; @@ -159,5 +161,45 @@ namespace SixLabors.ImageSharp.PixelFormats PixelOperations.Instance.From(configuration, sourcePixels, destinationPixels); } + + /// + /// Bulk operation that packs 3 seperate RGB channels to . + /// The destination must have a padding of 3. + /// + /// A to configure internal operations. + /// A to the red values. + /// A to the green values. + /// A to the blue values. + /// A to the destination pixels. + internal virtual void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + Guard.NotNull(configuration, nameof(configuration)); + + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + + Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination)); + + Rgb24 rgb24 = default; + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref TPixel d = ref MemoryMarshal.GetReference(destination); + + for (int i = 0; i < count; i++) + { + rgb24.R = Unsafe.Add(ref r, i); + rgb24.G = Unsafe.Add(ref g, i); + rgb24.B = Unsafe.Add(ref b, i); + Unsafe.Add(ref d, i).FromRgb24(rgb24); + } + } } } diff --git a/src/ImageSharp/Primitives/DenseMatrix{T}.cs b/src/ImageSharp/Primitives/DenseMatrix{T}.cs index e312703368..60dadb617b 100644 --- a/src/ImageSharp/Primitives/DenseMatrix{T}.cs +++ b/src/ImageSharp/Primitives/DenseMatrix{T}.cs @@ -109,7 +109,7 @@ namespace SixLabors.ImageSharp /// The at the specified position. public ref T this[int row, int column] { - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] get { this.CheckCoordinates(row, column); @@ -124,7 +124,7 @@ namespace SixLabors.ImageSharp /// /// The representation on the source data. /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static implicit operator DenseMatrix(T[,] data) => new DenseMatrix(data); /// @@ -134,7 +134,7 @@ namespace SixLabors.ImageSharp /// /// The representation on the source data. /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] #pragma warning disable SA1008 // Opening parenthesis should be spaced correctly public static implicit operator T[,](in DenseMatrix data) #pragma warning restore SA1008 // Opening parenthesis should be spaced correctly @@ -175,7 +175,7 @@ namespace SixLabors.ImageSharp /// Transposes the rows and columns of the dense matrix. /// /// The . - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public DenseMatrix Transpose() { var result = new DenseMatrix(this.Rows, this.Columns); @@ -196,13 +196,13 @@ namespace SixLabors.ImageSharp /// Fills the matrix with the given value /// /// The value to fill each item with - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Fill(T value) => this.Span.Fill(value); /// /// Clears the matrix setting each value to the default value for the element type /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Clear() => this.Span.Clear(); /// @@ -232,14 +232,14 @@ namespace SixLabors.ImageSharp => obj is DenseMatrix other && this.Equals(other); /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Equals(DenseMatrix other) => this.Columns == other.Columns && this.Rows == other.Rows && this.Span.SequenceEqual(other.Span); /// - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public override int GetHashCode() { HashCode code = default; diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs index 3a5f35cd14..bb559019b7 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs @@ -1,10 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -43,12 +40,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } /// - /// Gets the horizontal gradient operator. + /// Gets the horizontal convolution kernel. /// public DenseMatrix KernelX { get; } /// - /// Gets the vertical gradient operator. + /// Gets the vertical convolution kernel. /// public DenseMatrix KernelY { get; } @@ -60,102 +57,39 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// protected override void OnFrameApply(ImageFrame source) { - using Buffer2D targetPixels = this.Configuration.MemoryAllocator.Allocate2D(source.Width, source.Height); + MemoryAllocator allocator = this.Configuration.MemoryAllocator; + using Buffer2D targetPixels = allocator.Allocate2D(source.Width, source.Height); source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelY, this.KernelX, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in operation); + // We use a rectangle 3x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 3, interest.Height); - Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); - } - - /// - /// A implementing the convolution logic for . - /// - private readonly struct RowOperation : IRowOperation - { - private readonly Rectangle bounds; - private readonly int maxY; - private readonly int maxX; - private readonly Buffer2D targetPixels; - private readonly Buffer2D sourcePixels; - private readonly DenseMatrix kernelY; - private readonly DenseMatrix kernelX; - private readonly Configuration configuration; - private readonly bool preserveAlpha; - - [MethodImpl(InliningOptions.ShortMethod)] - public RowOperation( - Rectangle bounds, - Buffer2D targetPixels, - Buffer2D sourcePixels, - DenseMatrix kernelY, - DenseMatrix kernelX, - Configuration configuration, - bool preserveAlpha) - { - this.bounds = bounds; - this.maxY = this.bounds.Bottom - 1; - this.maxX = this.bounds.Right - 1; - this.targetPixels = targetPixels; - this.sourcePixels = sourcePixels; - this.kernelY = kernelY; - this.kernelX = kernelX; - this.configuration = configuration; - this.preserveAlpha = preserveAlpha; - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void Invoke(int y, Span span) + using (var map = new KernelSamplingMap(allocator)) { - ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); + // Since the kernel sizes are identical we can use a single map. + map.BuildSamplingOffsetMap(this.KernelY, interest); - if (this.preserveAlpha) - { - for (int x = 0; x < this.bounds.Width; x++) - { - DenseMatrixUtils.Convolve2D3( - in this.kernelY, - in this.kernelX, - this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); - } - } - else - { - for (int x = 0; x < this.bounds.Width; x++) - { - DenseMatrixUtils.Convolve2D4( - in this.kernelY, - in this.kernelX, - this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); - } - } + var operation = new Convolution2DRowOperation( + interest, + targetPixels, + source.PixelBuffer, + map, + this.KernelY, + this.KernelX, + this.Configuration, + this.PreserveAlpha); - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); + ParallelRowIterator.IterateRows, Vector4>( + this.Configuration, + operationBounds, + in operation); } + + Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs new file mode 100644 index 0000000000..802d1809f2 --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A implementing the logic for 2D convolution. + /// + internal readonly struct Convolution2DRowOperation : IRowOperation + where TPixel : unmanaged, IPixel + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrixY; + private readonly DenseMatrix kernelMatrixX; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Convolution2DRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrixY, + DenseMatrix kernelMatrixX, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrixY = kernelMatrixY; + this.kernelMatrixX = kernelMatrixX; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 3x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, boundsWidth); + Span targetYBuffer = span.Slice(boundsWidth, boundsWidth); + Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); + + var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffers for each row run. + targetYBuffer.Clear(); + targetXBuffer.Clear(); + ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); + ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; + Span sourceRow; + for (int kY = 0; kY < kernelY.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); + ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); + + for (int kX = 0; kX < kernelY.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + targetY += kernelX[kY, kX] * sample; + targetX += kernelY[kY, kX] * sample; + } + } + } + + // Now we need to combine the values and copy the original alpha values + // from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); + Vector4 vectorY = target; + Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); + + target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRowSpan); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 3x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, boundsWidth); + Span targetYBuffer = span.Slice(boundsWidth, boundsWidth); + Span targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth); + + var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffers for each row run. + targetYBuffer.Clear(); + targetXBuffer.Clear(); + ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer); + ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer); + + ReadOnlyKernel kernelY = state.KernelY; + ReadOnlyKernel kernelX = state.KernelX; + for (int kY = 0; kY < kernelY.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x); + ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x); + + for (int kX = 0; kX < kernelY.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + targetY += kernelX[kY, kX] * sample; + targetX += kernelY[kY, kX] * sample; + } + } + } + + // Now we need to combine the values + for (int x = 0; x < targetYBuffer.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x); + Vector4 vectorY = target; + Vector4 vectorX = Unsafe.Add(ref targetBaseX, x); + + target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY)); + } + + Numerics.UnPremultiply(targetYBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRow); + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs new file mode 100644 index 0000000000..218093ac4e --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs @@ -0,0 +1,54 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A stack only struct used for reducing reference indirection during 2D convolution operations. + /// + internal readonly ref struct Convolution2DState + { + private readonly Span rowOffsetMap; + private readonly Span columnOffsetMap; + private readonly int kernelHeight; + private readonly int kernelWidth; + + public Convolution2DState( + in DenseMatrix kernelY, + in DenseMatrix kernelX, + KernelSamplingMap map) + { + // We check the kernels are the same size upstream. + this.KernelY = new ReadOnlyKernel(kernelY); + this.KernelX = new ReadOnlyKernel(kernelX); + this.kernelHeight = kernelY.Rows; + this.kernelWidth = kernelY.Columns; + this.rowOffsetMap = map.GetRowOffsetSpan(); + this.columnOffsetMap = map.GetColumnOffsetSpan(); + } + + public readonly ReadOnlyKernel KernelY + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + public readonly ReadOnlyKernel KernelX + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public readonly ref int GetSampleRow(int row) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public readonly ref int GetSampleColumn(int column) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index b61690415a..151b0ffccc 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -42,12 +42,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } /// - /// Gets the horizontal gradient operator. + /// Gets the horizontal convolution kernel. /// public DenseMatrix KernelX { get; } /// - /// Gets the vertical gradient operator. + /// Gets the vertical convolution kernel. /// public DenseMatrix KernelY { get; } @@ -63,96 +63,48 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - // Horizontal convolution - var horizontalOperation = new RowOperation(interest, firstPassPixels, source.PixelBuffer, this.KernelX, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in horizontalOperation); + // We use a rectangle 2x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); - // Vertical convolution - var verticalOperation = new RowOperation(interest, source.PixelBuffer, firstPassPixels, this.KernelY, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in verticalOperation); - } - - /// - /// A implementing the convolution logic for . - /// - private readonly struct RowOperation : IRowOperation - { - private readonly Rectangle bounds; - private readonly Buffer2D targetPixels; - private readonly Buffer2D sourcePixels; - private readonly DenseMatrix kernel; - private readonly Configuration configuration; - private readonly bool preserveAlpha; - - [MethodImpl(InliningOptions.ShortMethod)] - public RowOperation( - Rectangle bounds, - Buffer2D targetPixels, - Buffer2D sourcePixels, - DenseMatrix kernel, - Configuration configuration, - bool preserveAlpha) + using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator)) { - this.bounds = bounds; - this.targetPixels = targetPixels; - this.sourcePixels = sourcePixels; - this.kernel = kernel; - this.configuration = configuration; - this.preserveAlpha = preserveAlpha; + mapX.BuildSamplingOffsetMap(this.KernelX, interest); + + // Horizontal convolution + var horizontalOperation = new ConvolutionRowOperation( + interest, + firstPassPixels, + source.PixelBuffer, + mapX, + this.KernelX, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows, Vector4>( + this.Configuration, + operationBounds, + in horizontalOperation); } - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void Invoke(int y, Span span) + using (var mapY = new KernelSamplingMap(this.Configuration.MemoryAllocator)) { - ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); - - int maxY = this.bounds.Bottom - 1; - int maxX = this.bounds.Right - 1; - - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); - - if (this.preserveAlpha) - { - for (int x = 0; x < this.bounds.Width; x++) - { - DenseMatrixUtils.Convolve3( - in this.kernel, - this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - maxY, - this.bounds.X, - maxX); - } - } - else - { - for (int x = 0; x < this.bounds.Width; x++) - { - DenseMatrixUtils.Convolve4( - in this.kernel, - this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - maxY, - this.bounds.X, - maxX); - } - } - - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); + mapY.BuildSamplingOffsetMap(this.KernelY, interest); + + // Vertical convolution + var verticalOperation = new ConvolutionRowOperation( + interest, + source.PixelBuffer, + firstPassPixels, + mapY, + this.KernelY, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows, Vector4>( + this.Configuration, + operationBounds, + in verticalOperation); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs index 95fef15f62..924a1125bd 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution } /// - /// Gets the 2d gradient operator. + /// Gets the 2d convolution kernel. /// public DenseMatrix KernelXY { get; } @@ -51,16 +51,26 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// protected override void OnFrameApply(ImageFrame source) { - using Buffer2D targetPixels = this.Configuration.MemoryAllocator.Allocate2D(source.Size()); + MemoryAllocator allocator = this.Configuration.MemoryAllocator; + using Buffer2D targetPixels = allocator.Allocate2D(source.Size()); source.CopyTo(targetPixels); var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds()); - var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelXY, this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows( - this.Configuration, - interest, - in operation); + + // We use a rectangle 2x the interest width to allocate a buffer big enough + // for source and target bulk pixel conversion. + var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); + using (var map = new KernelSamplingMap(allocator)) + { + map.BuildSamplingOffsetMap(this.KernelXY, interest); + + var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha); + ParallelRowIterator.IterateRows( + this.Configuration, + operationBounds, + in operation); + } Buffer2D.SwapOrCopyContent(source.PixelBuffer, targetPixels); } @@ -71,10 +81,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly struct RowOperation : IRowOperation { private readonly Rectangle bounds; - private readonly int maxY; - private readonly int maxX; private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; private readonly DenseMatrix kernel; private readonly Configuration configuration; private readonly bool preserveAlpha; @@ -84,15 +93,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Rectangle bounds, Buffer2D targetPixels, Buffer2D sourcePixels, + KernelSamplingMap map, DenseMatrix kernel, Configuration configuration, bool preserveAlpha) { this.bounds = bounds; - this.maxY = this.bounds.Bottom - 1; - this.maxX = this.bounds.Right - 1; this.targetPixels = targetPixels; this.sourcePixels = sourcePixels; + this.map = map; this.kernel = kernel; this.configuration = configuration; this.preserveAlpha = preserveAlpha; @@ -102,45 +111,93 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(int y, Span span) { - ref Vector4 spanRef = ref MemoryMarshal.GetReference(span); + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span); + Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - Span targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X); - PixelOperations.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span); + var state = new ConvolutionState(in this.kernel, this.map); + int row = y - this.bounds.Y; + ref int sampleRowBase = ref state.GetSampleRow(row); if (this.preserveAlpha) { - for (int x = 0; x < this.bounds.Width; x++) + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + Span sourceRow; + for (int kY = 0; kY < state.Kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < state.Kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += state.Kernel[kY, kX] * sample; + } + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) { - DenseMatrixUtils.Convolve3( - in this.kernel, - this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; } } else { - for (int x = 0; x < this.bounds.Width; x++) + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + for (int kY = 0; kY < state.Kernel.Rows; kY++) { - DenseMatrixUtils.Convolve4( - in this.kernel, - this.sourcePixels, - ref spanRef, - y, - x, - this.bounds.Y, - this.maxY, - this.bounds.X, - this.maxX); + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int offsetY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < state.Kernel.Columns; kX++) + { + int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, offsetX); + target += state.Kernel[kY, kX] * sample; + } + } } + + Numerics.UnPremultiply(targetBuffer); } - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs new file mode 100644 index 0000000000..9876b2885b --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs @@ -0,0 +1,163 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A implementing the logic for 1D convolution. + /// + internal readonly struct ConvolutionRowOperation : IRowOperation + where TPixel : unmanaged, IPixel + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrix; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrix, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrix = kernelMatrix; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + ReadOnlyKernel kernel = state.Kernel; + Span sourceRow; + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + target += kernel[kY, kX] * sample; + } + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + ReadOnlyKernel kernel = state.Kernel; + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + target += kernel[kY, kX] * sample; + } + } + } + + Numerics.UnPremultiply(targetBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs new file mode 100644 index 0000000000..3f296c67df --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs @@ -0,0 +1,45 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A stack only struct used for reducing reference indirection during convolution operations. + /// + internal readonly ref struct ConvolutionState + { + private readonly Span rowOffsetMap; + private readonly Span columnOffsetMap; + private readonly int kernelHeight; + private readonly int kernelWidth; + + public ConvolutionState( + in DenseMatrix kernel, + KernelSamplingMap map) + { + this.Kernel = new ReadOnlyKernel(kernel); + this.kernelHeight = kernel.Rows; + this.kernelWidth = kernel.Columns; + this.rowOffsetMap = map.GetRowOffsetSpan(); + this.columnOffsetMap = map.GetColumnOffsetSpan(); + } + + public readonly ReadOnlyKernel Kernel + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public readonly ref int GetSampleRow(int row) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public readonly ref int GetSampleColumn(int column) + => ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth); + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs new file mode 100644 index 0000000000..e4b7dbea09 --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs @@ -0,0 +1,102 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Memory; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// Provides a map of the convolution kernel sampling offsets. + /// + internal sealed class KernelSamplingMap : IDisposable + { + private readonly MemoryAllocator allocator; + private bool isDisposed; + private IMemoryOwner yOffsets; + private IMemoryOwner xOffsets; + + /// + /// Initializes a new instance of the class. + /// + /// The memory allocator. + public KernelSamplingMap(MemoryAllocator allocator) => this.allocator = allocator; + + /// + /// Builds a map of the sampling offsets for the kernel clamped by the given bounds. + /// + /// The convolution kernel. + /// The source bounds. + public void BuildSamplingOffsetMap(DenseMatrix kernel, Rectangle bounds) + { + int kernelHeight = kernel.Rows; + int kernelWidth = kernel.Columns; + this.yOffsets = this.allocator.Allocate(bounds.Height * kernelHeight); + this.xOffsets = this.allocator.Allocate(bounds.Width * kernelWidth); + + int minY = bounds.Y; + int maxY = bounds.Bottom - 1; + int minX = bounds.X; + int maxX = bounds.Right - 1; + + int radiusY = kernelHeight >> 1; + int radiusX = kernelWidth >> 1; + + // Calculate the y and x sampling offsets clamped to the given rectangle. + // While this isn't a hotpath we still dip into unsafe to avoid the span bounds + // checks as the can potentially be looping over large arrays. + Span ySpan = this.yOffsets.GetSpan(); + ref int ySpanBase = ref MemoryMarshal.GetReference(ySpan); + for (int row = 0; row < bounds.Height; row++) + { + int rowBase = row * kernelHeight; + for (int y = 0; y < kernelHeight; y++) + { + Unsafe.Add(ref ySpanBase, rowBase + y) = row + y + minY - radiusY; + } + } + + if (kernelHeight > 1) + { + Numerics.Clamp(ySpan, minY, maxY); + } + + Span xSpan = this.xOffsets.GetSpan(); + ref int xSpanBase = ref MemoryMarshal.GetReference(xSpan); + for (int column = 0; column < bounds.Width; column++) + { + int columnBase = column * kernelWidth; + for (int x = 0; x < kernelWidth; x++) + { + Unsafe.Add(ref xSpanBase, columnBase + x) = column + x + minX - radiusX; + } + } + + if (kernelWidth > 1) + { + Numerics.Clamp(xSpan, minX, maxX); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetRowOffsetSpan() => this.yOffsets.GetSpan(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span GetColumnOffsetSpan() => this.xOffsets.GetSpan(); + + /// + public void Dispose() + { + if (!this.isDisposed) + { + this.yOffsets.Dispose(); + this.xOffsets.Dispose(); + + this.isDisposed = true; + } + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs b/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs new file mode 100644 index 0000000000..37e0060054 --- /dev/null +++ b/src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs @@ -0,0 +1,63 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Processing.Processors.Convolution +{ + /// + /// A stack only, readonly, kernel matrix that can be indexed without + /// bounds checks when compiled in release mode. + /// + internal readonly ref struct ReadOnlyKernel + { + private readonly ReadOnlySpan values; + + public ReadOnlyKernel(DenseMatrix matrix) + { + this.Columns = matrix.Columns; + this.Rows = matrix.Rows; + this.values = matrix.Span; + } + + public int Columns + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + public int Rows + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + public float this[int row, int column] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get + { + this.CheckCoordinates(row, column); + ref float vBase = ref MemoryMarshal.GetReference(this.values); + return Unsafe.Add(ref vBase, (row * this.Columns) + column); + } + } + + [Conditional("DEBUG")] + private void CheckCoordinates(int row, int column) + { + if (row < 0 || row >= this.Rows) + { + throw new ArgumentOutOfRangeException(nameof(row), row, $"{row} is outwith the matrix bounds."); + } + + if (column < 0 || column >= this.Columns) + { + throw new ArgumentOutOfRangeException(nameof(column), column, $"{column} is outwith the matrix bounds."); + } + } + } +} diff --git a/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs index 4dc9e41960..d0c8ff40d7 100644 --- a/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs @@ -72,11 +72,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Filters public void Invoke(int y, Span span) { Span rowSpan = this.source.GetPixelRowSpan(y).Slice(this.startX, span.Length); - PixelOperations.Instance.ToVector4(this.configuration, rowSpan, span); + PixelOperations.Instance.ToVector4(this.configuration, rowSpan, span, PixelConversionModifiers.Scale); ColorNumerics.Transform(span, ref Unsafe.AsRef(this.matrix)); - PixelOperations.Instance.FromVector4Destructive(this.configuration, span, rowSpan); + PixelOperations.Instance.FromVector4Destructive(this.configuration, span, rowSpan, PixelConversionModifiers.Scale); } } } diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index 4c9f6c06db..d08e2f2d66 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -27,6 +27,14 @@ namespace SixLabors.ImageSharp.Benchmarks } + public class MultiFramework : Config + { + public MultiFramework() => this.AddJob( + Job.Default.WithRuntime(ClrRuntime.Net472), + Job.Default.WithRuntime(CoreRuntime.Core21), + Job.Default.WithRuntime(CoreRuntime.Core31)); + } + public class ShortClr : Config { public ShortClr() => this.AddJob( diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs new file mode 100644 index 0000000000..eade8e0c43 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -0,0 +1,286 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion +{ + public unsafe class PixelConversion_PackFromRgbPlanes + { + private byte[] rBuf; + private byte[] gBuf; + private byte[] bBuf; + private Rgb24[] rgbBuf; + private Rgba32[] rgbaBuf; + + private float[] rFloat; + private float[] gFloat; + private float[] bFloat; + + private float[] rgbaFloat; + + [Params(1024)] + public int Count { get; set; } + + [GlobalSetup] + public void Setup() + { + this.rBuf = new byte[this.Count]; + this.gBuf = new byte[this.Count]; + this.bBuf = new byte[this.Count]; + this.rgbBuf = new Rgb24[this.Count + 3]; // padded + this.rgbaBuf = new Rgba32[this.Count]; + + this.rFloat = new float[this.Count]; + this.gFloat = new float[this.Count]; + this.bFloat = new float[this.Count]; + + this.rgbaFloat = new float[this.Count * 4]; + } + + // [Benchmark] + public void Rgb24_Scalar_PerElement_Pinned() + { + fixed (byte* r = &this.rBuf[0]) + fixed (byte* g = &this.gBuf[0]) + fixed (byte* b = &this.bBuf[0]) + fixed (Rgb24* rgb = &this.rgbBuf[0]) + { + for (int i = 0; i < this.Count; i++) + { + Rgb24* d = rgb + i; + d->R = r[i]; + d->G = g[i]; + d->B = b[i]; + } + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Span() + { + Span r = this.rBuf; + Span g = this.rBuf; + Span b = this.rBuf; + Span rgb = this.rgbBuf; + + for (int i = 0; i < r.Length; i++) + { + ref Rgb24 d = ref rgb[i]; + d.R = r[i]; + d.G = g[i]; + d.B = b[i]; + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Unsafe() + { + ref byte r = ref this.rBuf[0]; + ref byte g = ref this.rBuf[0]; + ref byte b = ref this.rBuf[0]; + ref Rgb24 rgb = ref this.rgbBuf[0]; + + for (int i = 0; i < this.Count; i++) + { + ref Rgb24 d = ref Unsafe.Add(ref rgb, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Batched8() + { + ref Byte8 r = ref Unsafe.As(ref this.rBuf[0]); + ref Byte8 g = ref Unsafe.As(ref this.rBuf[0]); + ref Byte8 b = ref Unsafe.As(ref this.rBuf[0]); + ref Rgb24 rgb = ref this.rgbBuf[0]; + + int count = this.Count / 8; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 8); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + ref Rgb24 d4 = ref Unsafe.Add(ref d0, 4); + ref Rgb24 d5 = ref Unsafe.Add(ref d0, 5); + ref Rgb24 d6 = ref Unsafe.Add(ref d0, 6); + ref Rgb24 d7 = ref Unsafe.Add(ref d0, 7); + + ref Byte8 rr = ref Unsafe.Add(ref r, i); + ref Byte8 gg = ref Unsafe.Add(ref g, i); + ref Byte8 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + + d4.R = rr.V4; + d4.G = gg.V4; + d4.B = bb.V4; + + d5.R = rr.V5; + d5.G = gg.V5; + d5.B = bb.V5; + + d6.R = rr.V6; + d6.G = gg.V6; + d6.B = bb.V6; + + d7.R = rr.V7; + d7.G = gg.V7; + d7.B = bb.V7; + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Batched4() + { + ref Byte4 r = ref Unsafe.As(ref this.rBuf[0]); + ref Byte4 g = ref Unsafe.As(ref this.rBuf[0]); + ref Byte4 b = ref Unsafe.As(ref this.rBuf[0]); + ref Rgb24 rgb = ref this.rgbBuf[0]; + + int count = this.Count / 4; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + + ref Byte4 rr = ref Unsafe.Add(ref r, i); + ref Byte4 gg = ref Unsafe.Add(ref g, i); + ref Byte4 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark(Baseline = true)] + public void Rgba32_Avx2_Float() + { + ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]); + ref Vector256 gBase = ref Unsafe.As>(ref this.gFloat[0]); + ref Vector256 bBase = ref Unsafe.As>(ref this.bFloat[0]); + ref Vector256 resultBase = ref Unsafe.As>(ref this.rgbaFloat[0]); + + int count = this.Count / Vector256.Count; + + ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + var va = Vector256.Create(1F); + + for (int i = 0; i < count; i++) + { + Vector256 r = Unsafe.Add(ref rBase, i); + Vector256 g = Unsafe.Add(ref gBase, i); + Vector256 b = Unsafe.Add(ref bBase, i); + + r = Avx2.PermuteVar8x32(r, vcontrol); + g = Avx2.PermuteVar8x32(g, vcontrol); + b = Avx2.PermuteVar8x32(b, vcontrol); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } + } + + [Benchmark] + public void Rgb24_Avx2_Bytes() + { + ReadOnlySpan r = this.rBuf; + ReadOnlySpan g = this.rBuf; + ReadOnlySpan b = this.rBuf; + Span rgb = this.rgbBuf; + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); + } + + [Benchmark] + public void Rgba32_Avx2_Bytes() + { + ReadOnlySpan r = this.rBuf; + ReadOnlySpan g = this.rBuf; + ReadOnlySpan b = this.rBuf; + Span rgb = this.rgbaBuf; + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); + } +#endif + +#pragma warning disable SA1132 + private struct Byte8 + { + public byte V0, V1, V2, V3, V4, V5, V6, V7; + } + + private struct Byte4 + { + public byte V0, V1, V2, V3; + } +#pragma warning restore + + // Results @ Anton's PC, 2020 Dec 05 + // .NET Core 3.1.1 + // Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores + // + // | Method | Count | Mean | Error | StdDev | Ratio | RatioSD | + // |--------------------------------- |------ |-----------:|---------:|---------:|------:|--------:| + // | Rgb24_Scalar_PerElement_Span | 1024 | 1,634.6 ns | 26.56 ns | 24.84 ns | 3.12 | 0.05 | + // | Rgb24_Scalar_PerElement_Unsafe | 1024 | 1,284.7 ns | 4.70 ns | 4.16 ns | 2.46 | 0.01 | + // | Rgb24_Scalar_PerElement_Batched8 | 1024 | 1,182.3 ns | 5.12 ns | 4.27 ns | 2.26 | 0.01 | + // | Rgb24_Scalar_PerElement_Batched4 | 1024 | 1,146.2 ns | 16.38 ns | 14.52 ns | 2.19 | 0.02 | + // | Rgba32_Avx2_Float | 1024 | 522.7 ns | 1.78 ns | 1.39 ns | 1.00 | 0.00 | + // | Rgb24_Avx2_Bytes | 1024 | 243.3 ns | 1.56 ns | 1.30 ns | 0.47 | 0.00 | + // | Rgba32_Avx2_Bytes | 1024 | 146.0 ns | 2.48 ns | 2.32 ns | 0.28 | 0.01 | + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs b/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs index 62d5806037..8f009e58f1 100644 --- a/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs +++ b/tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs @@ -7,7 +7,7 @@ using SixLabors.ImageSharp.Processing; namespace SixLabors.ImageSharp.Benchmarks.Samplers { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.MultiFramework))] public class GaussianBlur { [Benchmark] diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index ec09e43e57..1f680aa6cc 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -5,8 +5,10 @@ using System; using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Common.Tuples; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -169,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArbitraryArraySizes = new TheoryData { - 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520, + 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, }; [Theory] @@ -336,90 +338,135 @@ namespace SixLabors.ImageSharp.Tests.Common } } - private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( - int count, - Action, - Memory> convert, - int seed = -1) + [Theory] + [MemberData(nameof(ArbitraryArraySizes))] + public void PackFromRgbPlanes_Rgb24(int count) { - seed = seed > 0 ? seed : count; - float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f); - byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); - var actual = new byte[count]; - - convert(source, actual); - - Assert.Equal(expected, actual); + TestPackFromRgbPlanes( + count, + (r, g, b, actual) => + SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } - private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f)); - [Theory] - [InlineData(0)] - [InlineData(7)] - [InlineData(42)] - [InlineData(255)] - [InlineData(256)] - [InlineData(257)] - private void MagicConvertToByte(float value) + [MemberData(nameof(ArbitraryArraySizes))] + public void PackFromRgbPlanes_Rgba32(int count) { - byte actual = MagicConvert(value / 256f); - var expected = (byte)value; - - Assert.Equal(expected, actual); + TestPackFromRgbPlanes( + count, + (r, g, b, actual) => + SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } +#if SUPPORTS_RUNTIME_INTRINSICS [Fact] - private void BulkConvertNormalizedFloatToByte_Step() + public void PackFromRgbPlanesAvx2Reduce_Rgb24() { - if (this.SkipOnNonAvx2()) + if (!Avx2.IsSupported) { return; } - float[] source = { 0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f }; + byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray(); + byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray(); + byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray(); + const int padding = 4; + Rgb24[] d = new Rgb24[32 + padding]; - byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray(); + ReadOnlySpan rr = r.AsSpan(); + ReadOnlySpan gg = g.AsSpan(); + ReadOnlySpan bb = b.AsSpan(); + Span dd = d.AsSpan(); - source = source.Select(f => f / 255f).ToArray(); + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); - Span dest = stackalloc byte[8]; - - this.MagicConvert(source, dest); + for (int i = 0; i < 32; i++) + { + Assert.Equal(i, d[i].R); + Assert.Equal(i + 100, d[i].G); + Assert.Equal(i + 200, d[i].B); + } - Assert.True(dest.SequenceEqual(expected)); + Assert.Equal(0, rr.Length); + Assert.Equal(0, gg.Length); + Assert.Equal(0, bb.Length); + Assert.Equal(padding, dd.Length); } - private static byte MagicConvert(float x) + [Fact] + public void PackFromRgbPlanesAvx2Reduce_Rgba32() { - float f = 32768.0f + x; - uint i = Unsafe.As(ref f); - return (byte)i; - } + if (!Avx2.IsSupported) + { + return; + } - private void MagicConvert(Span source, Span dest) - { - var magick = new Vector(32768.0f); + byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray(); + byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray(); + byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray(); - var scale = new Vector(255f) / new Vector(256f); + Rgba32[] d = new Rgba32[32]; - Vector x = MemoryMarshal.Cast>(source)[0]; + ReadOnlySpan rr = r.AsSpan(); + ReadOnlySpan gg = g.AsSpan(); + ReadOnlySpan bb = b.AsSpan(); + Span dd = d.AsSpan(); + + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); + + for (int i = 0; i < 32; i++) + { + Assert.Equal(i, d[i].R); + Assert.Equal(i + 100, d[i].G); + Assert.Equal(i + 200, d[i].B); + Assert.Equal(255, d[i].A); + } - x = (x * scale) + magick; + Assert.Equal(0, rr.Length); + Assert.Equal(0, gg.Length); + Assert.Equal(0, bb.Length); + Assert.Equal(0, dd.Length); + } +#endif + + internal static void TestPackFromRgbPlanes(int count, Action packMethod) + where TPixel : unmanaged, IPixel + { + Random rnd = new Random(42); + byte[] r = rnd.GenerateRandomByteArray(count); + byte[] g = rnd.GenerateRandomByteArray(count); + byte[] b = rnd.GenerateRandomByteArray(count); + + TPixel[] expected = new TPixel[count]; + for (int i = 0; i < count; i++) + { + expected[i].FromRgb24(new Rgb24(r[i], g[i], b[i])); + } - Tuple8.OfUInt32 ii = default; + TPixel[] actual = new TPixel[count + 3]; // padding for Rgb24 AVX2 + packMethod(r, g, b, actual); - ref Vector iiRef = ref Unsafe.As>(ref ii); + Assert.True(expected.AsSpan().SequenceEqual(actual.AsSpan().Slice(0, count))); + } - iiRef = x; + private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + int count, + Action, + Memory> convert, + int seed = -1) + { + seed = seed > 0 ? seed : count; + float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f); + byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); + var actual = new byte[count]; - ref Tuple8.OfByte d = ref MemoryMarshal.Cast(dest)[0]; - d.LoadFrom(ref ii); + convert(source, actual); - this.Output.WriteLine(ii.ToString()); - this.Output.WriteLine(d.ToString()); + Assert.Equal(expected, actual); } + private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f)); + private static void AssertEvenRoundIsCorrect(Vector r, Vector v) { for (int i = 0; i < Vector.Count; i++) diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs index 8d74ccec40..39786a2177 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs @@ -10,6 +10,7 @@ using System.Runtime.InteropServices; using SixLabors.ImageSharp.ColorSpaces.Companding; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Tests.Common; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -1002,6 +1003,19 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations (s, d) => this.Operations.ToRgba64Bytes(this.Configuration, s, d.GetSpan(), count)); } + [Theory] + [MemberData(nameof(ArraySizesData))] + public void PackFromRgbPlanes(int count) + { + SimdUtilsTests.TestPackFromRgbPlanes( + count, + ( + r, + g, + b, + actual) => PixelOperations.Instance.PackFromRgbPlanes(this.Configuration, r, g, b, actual)); + } + public delegate void RefAction(ref T1 arg1); internal static Vector4[] CreateExpectedVector4Data(TPixel[] source, RefAction vectorModifier = null) @@ -1102,10 +1116,10 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations return result; } - internal static byte[] CreateByteTestData(int length) + internal static byte[] CreateByteTestData(int length, int seed = 42) { byte[] result = new byte[length]; - var rnd = new Random(42); // Deterministic random values + var rnd = new Random(seed); // Deterministic random values for (int i = 0; i < result.Length; i++) { diff --git a/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs b/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs index 75a9072c59..680a6afdce 100644 --- a/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs +++ b/tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs @@ -1,6 +1,7 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Filters; using Xunit; @@ -26,5 +27,33 @@ namespace SixLabors.ImageSharp.Tests.Processing.Effects Assert.Equal(1.5F, processor.Amount); } + + [Fact] + public void Brightness_scaled_vector() + { + var rgbImage = new Image(Configuration.Default, 100, 100, new Rgb24(0, 0, 0)); + + rgbImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new Rgb24(0, 0, 0), rgbImage[0, 0]); + + rgbImage = new Image(Configuration.Default, 100, 100, new Rgb24(10, 10, 10)); + + rgbImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new Rgb24(20, 20, 20), rgbImage[0, 0]); + + var halfSingleImage = new Image(Configuration.Default, 100, 100, new HalfSingle(-1)); + + halfSingleImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new HalfSingle(-1), halfSingleImage[0, 0]); + + halfSingleImage = new Image(Configuration.Default, 100, 100, new HalfSingle(-0.5f)); + + halfSingleImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2))); + + Assert.Equal(new HalfSingle(0), halfSingleImage[0, 0]); + } } -} \ No newline at end of file +}