Browse Source

Merge branch 'master' into sp/image-wrap-ptr

js/color-alpha-handling
Sergio Pedri 5 years ago
committed by GitHub
parent
commit
cad38c5156
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      .gitattributes
  2. 279
      src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs
  3. 147
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  4. 206
      src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
  5. 9
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  6. 10
      src/ImageSharp/ImageSharp.csproj
  7. 7
      src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs
  8. 17
      src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs
  9. 17
      src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs
  10. 42
      src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
  11. 16
      src/ImageSharp/Primitives/DenseMatrix{T}.cs
  12. 116
      src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs
  13. 193
      src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs
  14. 54
      src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs
  15. 126
      src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs
  16. 131
      src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs
  17. 163
      src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs
  18. 45
      src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs
  19. 102
      src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs
  20. 63
      src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs
  21. 4
      src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs
  22. 8
      tests/ImageSharp.Benchmarks/Config.cs
  23. 286
      tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs
  24. 2
      tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs
  25. 159
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  26. 18
      tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs
  27. 33
      tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs

3
.gitattributes

@ -80,8 +80,11 @@
*.pvr binary
*.snk binary
*.tga binary
*.tif binary
*.tiff binary
*.ttc binary
*.ttf binary
*.wbmp binary
*.webp binary
*.woff binary
*.woff2 binary

279
src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs

@ -1,279 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
{
/// <summary>
/// Extension methods for <see cref="DenseMatrix{T}"/>.
/// TODO: One day rewrite all this to use SIMD intrinsics. There's a lot of scope for improvement.
/// </summary>
internal static class DenseMatrixUtils
{
/// <summary>
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the two kernel weight values.
/// Using this method the convolution filter is not applied to alpha in addition to the color channels.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="matrixY">The vertical dense matrix.</param>
/// <param name="matrixX">The horizontal dense matrix.</param>
/// <param name="sourcePixels">The source frame.</param>
/// <param name="targetRowRef">The target row base reference.</param>
/// <param name="row">The current row.</param>
/// <param name="column">The current column.</param>
/// <param name="minRow">The minimum working area row.</param>
/// <param name="maxRow">The maximum working area row.</param>
/// <param name="minColumn">The minimum working area column.</param>
/// <param name="maxColumn">The maximum working area column.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Convolve2D3<TPixel>(
in DenseMatrix<float> matrixY,
in DenseMatrix<float> matrixX,
Buffer2D<TPixel> sourcePixels,
ref Vector4 targetRowRef,
int row,
int column,
int minRow,
int maxRow,
int minColumn,
int maxColumn)
where TPixel : unmanaged, IPixel<TPixel>
{
Convolve2DImpl(
in matrixY,
in matrixX,
sourcePixels,
row,
column,
minRow,
maxRow,
minColumn,
maxColumn,
out Vector4 vector);
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
vector.W = target.W;
Numerics.UnPremultiply(ref vector);
target = vector;
}
/// <summary>
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the two kernel weight values.
/// Using this method the convolution filter is applied to alpha in addition to the color channels.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="matrixY">The vertical dense matrix.</param>
/// <param name="matrixX">The horizontal dense matrix.</param>
/// <param name="sourcePixels">The source frame.</param>
/// <param name="targetRowRef">The target row base reference.</param>
/// <param name="row">The current row.</param>
/// <param name="column">The current column.</param>
/// <param name="minRow">The minimum working area row.</param>
/// <param name="maxRow">The maximum working area row.</param>
/// <param name="minColumn">The minimum working area column.</param>
/// <param name="maxColumn">The maximum working area column.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Convolve2D4<TPixel>(
in DenseMatrix<float> matrixY,
in DenseMatrix<float> matrixX,
Buffer2D<TPixel> sourcePixels,
ref Vector4 targetRowRef,
int row,
int column,
int minRow,
int maxRow,
int minColumn,
int maxColumn)
where TPixel : unmanaged, IPixel<TPixel>
{
Convolve2DImpl(
in matrixY,
in matrixX,
sourcePixels,
row,
column,
minRow,
maxRow,
minColumn,
maxColumn,
out Vector4 vector);
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
Numerics.UnPremultiply(ref vector);
target = vector;
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void Convolve2DImpl<TPixel>(
in DenseMatrix<float> matrixY,
in DenseMatrix<float> matrixX,
Buffer2D<TPixel> sourcePixels,
int row,
int column,
int minRow,
int maxRow,
int minColumn,
int maxColumn,
out Vector4 vector)
where TPixel : unmanaged, IPixel<TPixel>
{
Vector4 vectorY = default;
Vector4 vectorX = default;
int matrixHeight = matrixY.Rows;
int matrixWidth = matrixY.Columns;
int radiusY = matrixHeight >> 1;
int radiusX = matrixWidth >> 1;
int sourceOffsetColumnBase = column + minColumn;
for (int y = 0; y < matrixHeight; y++)
{
int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow);
Span<TPixel> sourceRowSpan = sourcePixels.GetRowSpan(offsetY);
for (int x = 0; x < matrixWidth; x++)
{
int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn);
var currentColor = sourceRowSpan[offsetX].ToVector4();
Numerics.Premultiply(ref currentColor);
vectorX += matrixX[y, x] * currentColor;
vectorY += matrixY[y, x] * currentColor;
}
}
vector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY));
}
/// <summary>
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the kernel weight values.
/// Using this method the convolution filter is not applied to alpha in addition to the color channels.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="matrix">The dense matrix.</param>
/// <param name="sourcePixels">The source frame.</param>
/// <param name="targetRowRef">The target row base reference.</param>
/// <param name="row">The current row.</param>
/// <param name="column">The current column.</param>
/// <param name="minRow">The minimum working area row.</param>
/// <param name="maxRow">The maximum working area row.</param>
/// <param name="minColumn">The minimum working area column.</param>
/// <param name="maxColumn">The maximum working area column.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Convolve3<TPixel>(
in DenseMatrix<float> matrix,
Buffer2D<TPixel> sourcePixels,
ref Vector4 targetRowRef,
int row,
int column,
int minRow,
int maxRow,
int minColumn,
int maxColumn)
where TPixel : unmanaged, IPixel<TPixel>
{
Vector4 vector = default;
ConvolveImpl(
in matrix,
sourcePixels,
row,
column,
minRow,
maxRow,
minColumn,
maxColumn,
ref vector);
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
vector.W = target.W;
Numerics.UnPremultiply(ref vector);
target = vector;
}
/// <summary>
/// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the kernel weight values.
/// Using this method the convolution filter is applied to alpha in addition to the color channels.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="matrix">The dense matrix.</param>
/// <param name="sourcePixels">The source frame.</param>
/// <param name="targetRowRef">The target row base reference.</param>
/// <param name="row">The current row.</param>
/// <param name="column">The current column.</param>
/// <param name="minRow">The minimum working area row.</param>
/// <param name="maxRow">The maximum working area row.</param>
/// <param name="minColumn">The minimum working area column.</param>
/// <param name="maxColumn">The maximum working area column.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Convolve4<TPixel>(
in DenseMatrix<float> matrix,
Buffer2D<TPixel> sourcePixels,
ref Vector4 targetRowRef,
int row,
int column,
int minRow,
int maxRow,
int minColumn,
int maxColumn)
where TPixel : unmanaged, IPixel<TPixel>
{
Vector4 vector = default;
ConvolveImpl(
in matrix,
sourcePixels,
row,
column,
minRow,
maxRow,
minColumn,
maxColumn,
ref vector);
ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
Numerics.UnPremultiply(ref vector);
target = vector;
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void ConvolveImpl<TPixel>(
in DenseMatrix<float> matrix,
Buffer2D<TPixel> sourcePixels,
int row,
int column,
int minRow,
int maxRow,
int minColumn,
int maxColumn,
ref Vector4 vector)
where TPixel : unmanaged, IPixel<TPixel>
{
int matrixHeight = matrix.Rows;
int matrixWidth = matrix.Columns;
int radiusY = matrixHeight >> 1;
int radiusX = matrixWidth >> 1;
int sourceOffsetColumnBase = column + minColumn;
for (int y = 0; y < matrixHeight; y++)
{
int offsetY = Numerics.Clamp(row + y - radiusY, minRow, maxRow);
Span<TPixel> sourceRowSpan = sourcePixels.GetRowSpan(offsetY);
for (int x = 0; x < matrixWidth; x++)
{
int offsetX = Numerics.Clamp(sourceOffsetColumnBase + x - radiusX, minColumn, maxColumn);
var currentColor = sourceRowSpan[offsetX].ToVector4();
Numerics.Premultiply(ref currentColor);
vector += matrix[y, x] * currentColor;
}
}
}
}
}

147
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -7,6 +7,7 @@ using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
{
@ -22,6 +23,20 @@ namespace SixLabors.ImageSharp
private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
private static ReadOnlySpan<byte> ShuffleMaskShiftAlpha =>
new byte[]
{
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
};
public static ReadOnlySpan<byte> PermuteMaskShiftAlpha8x32 =>
new byte[]
{
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
};
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
@ -789,6 +804,138 @@ namespace SixLabors.ImageSharp
}
}
}
internal static void PackFromRgbPlanesAvx2Reduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgb24> destination)
{
ref Vector256<byte> rBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(redChannel));
ref Vector256<byte> gBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(greenChannel));
ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel));
ref byte dBase = ref Unsafe.As<Rgb24, byte>(ref MemoryMarshal.GetReference(destination));
int count = redChannel.Length / Vector256<byte>.Count;
ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
Vector256<byte> a = Vector256.Create((byte)255);
Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
for (int i = 0; i < count; i++)
{
Vector256<byte> r0 = Unsafe.Add(ref rBase, i);
Vector256<byte> g0 = Unsafe.Add(ref gBase, i);
Vector256<byte> b0 = Unsafe.Add(ref bBase, i);
r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
Vector256<byte> rg = Avx2.UnpackLow(r0, g0);
Vector256<byte> b1 = Avx2.UnpackLow(b0, a);
Vector256<byte> rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rg = Avx2.UnpackHigh(r0, g0);
b1 = Avx2.UnpackHigh(b0, a);
Vector256<byte> rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha);
rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha);
rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha);
rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha);
rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte();
rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte();
rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte();
rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte();
ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i);
ref byte d2 = ref Unsafe.Add(ref d1, 24);
ref byte d3 = ref Unsafe.Add(ref d2, 24);
ref byte d4 = ref Unsafe.Add(ref d3, 24);
Unsafe.As<byte, Vector256<byte>>(ref d1) = rgb1;
Unsafe.As<byte, Vector256<byte>>(ref d2) = rgb2;
Unsafe.As<byte, Vector256<byte>>(ref d3) = rgb3;
Unsafe.As<byte, Vector256<byte>>(ref d4) = rgb4;
}
int slice = count * Vector256<byte>.Count;
redChannel = redChannel.Slice(slice);
greenChannel = greenChannel.Slice(slice);
blueChannel = blueChannel.Slice(slice);
destination = destination.Slice(slice);
}
internal static void PackFromRgbPlanesAvx2Reduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgba32> destination)
{
ref Vector256<byte> rBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(redChannel));
ref Vector256<byte> gBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(greenChannel));
ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel));
ref Vector256<byte> dBase = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(destination));
int count = redChannel.Length / Vector256<byte>.Count;
ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
Vector256<byte> a = Vector256.Create((byte)255);
Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
for (int i = 0; i < count; i++)
{
Vector256<byte> r0 = Unsafe.Add(ref rBase, i);
Vector256<byte> g0 = Unsafe.Add(ref gBase, i);
Vector256<byte> b0 = Unsafe.Add(ref bBase, i);
r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
Vector256<byte> rg = Avx2.UnpackLow(r0, g0);
Vector256<byte> b1 = Avx2.UnpackLow(b0, a);
Vector256<byte> rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rg = Avx2.UnpackHigh(r0, g0);
b1 = Avx2.UnpackHigh(b0, a);
Vector256<byte> rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
ref Vector256<byte> d0 = ref Unsafe.Add(ref dBase, i * 4);
d0 = rgb1;
Unsafe.Add(ref d0, 1) = rgb2;
Unsafe.Add(ref d0, 2) = rgb3;
Unsafe.Add(ref d0, 3) = rgb4;
}
int slice = count * Vector256<byte>.Count;
redChannel = redChannel.Slice(slice);
greenChannel = greenChannel.Slice(slice);
blueChannel = blueChannel.Slice(slice);
destination = destination.Slice(slice);
}
}
}
}

206
src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs

@ -0,0 +1,206 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
DebugGuard.IsTrue(destination.Length > redChannel.Length + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
}
else
#endif
{
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
}
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
}
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgba32> destination)
{
DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
DebugGuard.IsTrue(destination.Length > redChannel.Length, nameof(destination), "'destination' span should not be shorter than the source channels!");
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
}
else
#endif
{
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
}
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
}
private static void PackFromRgbPlanesScalarBatchedReduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgb24> destination)
{
ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel));
ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel));
ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel));
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
int count = redChannel.Length / 4;
for (int i = 0; i < count; i++)
{
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
}
int finished = count * 4;
redChannel = redChannel.Slice(finished);
greenChannel = greenChannel.Slice(finished);
blueChannel = blueChannel.Slice(finished);
destination = destination.Slice(finished);
}
private static void PackFromRgbPlanesScalarBatchedReduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgba32> destination)
{
ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel));
ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel));
ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel));
ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination);
int count = redChannel.Length / 4;
destination.Fill(new Rgba32(0, 0, 0, 255));
for (int i = 0; i < count; i++)
{
ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3);
ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
}
int finished = count * 4;
redChannel = redChannel.Slice(finished);
greenChannel = greenChannel.Slice(finished);
blueChannel = blueChannel.Slice(finished);
destination = destination.Slice(finished);
}
private static void PackFromRgbPlanesRemainder(
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
ref byte r = ref MemoryMarshal.GetReference(redChannel);
ref byte g = ref MemoryMarshal.GetReference(greenChannel);
ref byte b = ref MemoryMarshal.GetReference(blueChannel);
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < destination.Length; i++)
{
ref Rgb24 d = ref Unsafe.Add(ref rgb, i);
d.R = Unsafe.Add(ref r, i);
d.G = Unsafe.Add(ref g, i);
d.B = Unsafe.Add(ref b, i);
}
}
private static void PackFromRgbPlanesRemainder(
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgba32> destination)
{
ref byte r = ref MemoryMarshal.GetReference(redChannel);
ref byte g = ref MemoryMarshal.GetReference(greenChannel);
ref byte b = ref MemoryMarshal.GetReference(blueChannel);
ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < destination.Length; i++)
{
ref Rgba32 d = ref Unsafe.Add(ref rgba, i);
d.R = Unsafe.Add(ref r, i);
d.G = Unsafe.Add(ref g, i);
d.B = Unsafe.Add(ref b, i);
d.A = 255;
}
}
}
}

9
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -6,6 +6,7 @@ using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
@ -220,5 +221,13 @@ namespace SixLabors.ImageSharp
nameof(source),
$"length should be divisible by {shouldBeDivisibleBy}!");
}
private struct ByteTuple4
{
public byte V0;
public byte V1;
public byte V2;
public byte V3;
}
}
}

10
src/ImageSharp/ImageSharp.csproj

@ -24,16 +24,16 @@
</ItemGroup>
<ItemGroup Condition=" $(TargetFramework.StartsWith('netstandard')) OR '$(TargetFramework)' == 'net472'">
<PackageReference Include="System.Numerics.Vectors" Version="4.5.0"/>
<PackageReference Include="System.Numerics.Vectors" Version="4.5.0" />
<PackageReference Include="System.Buffers" Version="4.5.1" />
<PackageReference Include="System.Memory" Version="4.5.4"/>
<PackageReference Include="System.Memory" Version="4.5.4" />
</ItemGroup>
<ItemGroup Condition=" '$(TargetFramework)' == 'netstandard1.3'">
<PackageReference Include="System.IO.Compression" Version="4.3.0"/>
<PackageReference Include="System.IO.Compression" Version="4.3.0" />
<PackageReference Include="System.IO.UnmanagedMemoryStream" Version="4.3.0" />
<PackageReference Include="System.Threading.Tasks.Parallel" Version="4.3.0"/>
<PackageReference Include="System.ValueTuple" Version="4.5.0"/>
<PackageReference Include="System.Threading.Tasks.Parallel" Version="4.3.0" />
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
<ItemGroup>

7
src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs

@ -53,8 +53,13 @@ namespace SixLabors.ImageSharp.Memory
{
ThrowObjectDisposedException();
}
#if SUPPORTS_CREATESPAN
ref byte r0 = ref MemoryMarshal.GetReference<byte>(this.Data);
return MemoryMarshal.CreateSpan(ref Unsafe.As<byte, T>(ref r0), this.length);
#else
return MemoryMarshal.Cast<byte, T>(this.Data.AsSpan()).Slice(0, this.length);
#endif
}
/// <inheritdoc />

17
src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs

@ -21,6 +21,23 @@ namespace SixLabors.ImageSharp.PixelFormats
/// <inheritdoc />
public override PixelTypeInfo GetPixelTypeInfo() => LazyInfo.Value;
/// <inheritdoc />
internal override void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
Guard.NotNull(configuration, nameof(configuration));
int count = redChannel.Length;
Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!");
Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!");
Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination);
}
}
}
}

17
src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs

@ -56,6 +56,23 @@ namespace SixLabors.ImageSharp.PixelFormats
MemoryMarshal.Cast<Vector4, float>(sourceVectors),
MemoryMarshal.Cast<Rgba32, byte>(destinationPixels));
}
/// <inheritdoc />
internal override void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgba32> destination)
{
Guard.NotNull(configuration, nameof(configuration));
int count = redChannel.Length;
Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!");
Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!");
Guard.IsTrue(destination.Length > count, nameof(destination), "'destination' span should not be shorter than the source channels!");
SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination);
}
}
}
}

42
src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs

@ -4,6 +4,8 @@
using System;
using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Formats;
using SixLabors.ImageSharp.Memory;
@ -159,5 +161,45 @@ namespace SixLabors.ImageSharp.PixelFormats
PixelOperations<TDestinationPixel>.Instance.From(configuration, sourcePixels, destinationPixels);
}
/// <summary>
/// Bulk operation that packs 3 seperate RGB channels to <paramref name="destination"/>.
/// The destination must have a padding of 3.
/// </summary>
/// <param name="configuration">A <see cref="Configuration"/> to configure internal operations.</param>
/// <param name="redChannel">A <see cref="ReadOnlySpan{T}"/> to the red values.</param>
/// <param name="greenChannel">A <see cref="ReadOnlySpan{T}"/> to the green values.</param>
/// <param name="blueChannel">A <see cref="ReadOnlySpan{T}"/> to the blue values.</param>
/// <param name="destination">A <see cref="Span{T}"/> to the destination pixels.</param>
internal virtual void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<TPixel> destination)
{
Guard.NotNull(configuration, nameof(configuration));
int count = redChannel.Length;
Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!");
Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!");
Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination));
Rgb24 rgb24 = default;
ref byte r = ref MemoryMarshal.GetReference(redChannel);
ref byte g = ref MemoryMarshal.GetReference(greenChannel);
ref byte b = ref MemoryMarshal.GetReference(blueChannel);
ref TPixel d = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < count; i++)
{
rgb24.R = Unsafe.Add(ref r, i);
rgb24.G = Unsafe.Add(ref g, i);
rgb24.B = Unsafe.Add(ref b, i);
Unsafe.Add(ref d, i).FromRgb24(rgb24);
}
}
}
}

16
src/ImageSharp/Primitives/DenseMatrix{T}.cs

@ -109,7 +109,7 @@ namespace SixLabors.ImageSharp
/// <returns>The <see typeparam="T"/> at the specified position.</returns>
public ref T this[int row, int column]
{
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
this.CheckCoordinates(row, column);
@ -124,7 +124,7 @@ namespace SixLabors.ImageSharp
/// <returns>
/// The <see cref="DenseMatrix{T}"/> representation on the source data.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static implicit operator DenseMatrix<T>(T[,] data) => new DenseMatrix<T>(data);
/// <summary>
@ -134,7 +134,7 @@ namespace SixLabors.ImageSharp
/// <returns>
/// The <see cref="T:T[,]"/> representation on the source data.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#pragma warning disable SA1008 // Opening parenthesis should be spaced correctly
public static implicit operator T[,](in DenseMatrix<T> data)
#pragma warning restore SA1008 // Opening parenthesis should be spaced correctly
@ -175,7 +175,7 @@ namespace SixLabors.ImageSharp
/// Transposes the rows and columns of the dense matrix.
/// </summary>
/// <returns>The <see cref="DenseMatrix{T}"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public DenseMatrix<T> Transpose()
{
var result = new DenseMatrix<T>(this.Rows, this.Columns);
@ -196,13 +196,13 @@ namespace SixLabors.ImageSharp
/// Fills the matrix with the given value
/// </summary>
/// <param name="value">The value to fill each item with</param>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Fill(T value) => this.Span.Fill(value);
/// <summary>
/// Clears the matrix setting each value to the default value for the element type
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Clear() => this.Span.Clear();
/// <summary>
@ -232,14 +232,14 @@ namespace SixLabors.ImageSharp
=> obj is DenseMatrix<T> other && this.Equals(other);
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool Equals(DenseMatrix<T> other) =>
this.Columns == other.Columns
&& this.Rows == other.Rows
&& this.Span.SequenceEqual(other.Span);
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override int GetHashCode()
{
HashCode code = default;

116
src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs

@ -1,10 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@ -43,12 +40,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
}
/// <summary>
/// Gets the horizontal gradient operator.
/// Gets the horizontal convolution kernel.
/// </summary>
public DenseMatrix<float> KernelX { get; }
/// <summary>
/// Gets the vertical gradient operator.
/// Gets the vertical convolution kernel.
/// </summary>
public DenseMatrix<float> KernelY { get; }
@ -60,102 +57,39 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// <inheritdoc/>
protected override void OnFrameApply(ImageFrame<TPixel> source)
{
using Buffer2D<TPixel> targetPixels = this.Configuration.MemoryAllocator.Allocate2D<TPixel>(source.Width, source.Height);
MemoryAllocator allocator = this.Configuration.MemoryAllocator;
using Buffer2D<TPixel> targetPixels = allocator.Allocate2D<TPixel>(source.Width, source.Height);
source.CopyTo(targetPixels);
var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds());
var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelY, this.KernelX, this.Configuration, this.PreserveAlpha);
ParallelRowIterator.IterateRows<RowOperation, Vector4>(
this.Configuration,
interest,
in operation);
// We use a rectangle 3x the interest width to allocate a buffer big enough
// for source and target bulk pixel conversion.
var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 3, interest.Height);
Buffer2D<TPixel>.SwapOrCopyContent(source.PixelBuffer, targetPixels);
}
/// <summary>
/// A <see langword="struct"/> implementing the convolution logic for <see cref="Convolution2DProcessor{T}"/>.
/// </summary>
private readonly struct RowOperation : IRowOperation<Vector4>
{
private readonly Rectangle bounds;
private readonly int maxY;
private readonly int maxX;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly DenseMatrix<float> kernelY;
private readonly DenseMatrix<float> kernelX;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
[MethodImpl(InliningOptions.ShortMethod)]
public RowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
DenseMatrix<float> kernelY,
DenseMatrix<float> kernelX,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
this.maxY = this.bounds.Bottom - 1;
this.maxX = this.bounds.Right - 1;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.kernelY = kernelY;
this.kernelX = kernelX;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
}
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(int y, Span<Vector4> span)
using (var map = new KernelSamplingMap(allocator))
{
ref Vector4 spanRef = ref MemoryMarshal.GetReference(span);
Span<TPixel> targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span);
// Since the kernel sizes are identical we can use a single map.
map.BuildSamplingOffsetMap(this.KernelY, interest);
if (this.preserveAlpha)
{
for (int x = 0; x < this.bounds.Width; x++)
{
DenseMatrixUtils.Convolve2D3(
in this.kernelY,
in this.kernelX,
this.sourcePixels,
ref spanRef,
y,
x,
this.bounds.Y,
this.maxY,
this.bounds.X,
this.maxX);
}
}
else
{
for (int x = 0; x < this.bounds.Width; x++)
{
DenseMatrixUtils.Convolve2D4(
in this.kernelY,
in this.kernelX,
this.sourcePixels,
ref spanRef,
y,
x,
this.bounds.Y,
this.maxY,
this.bounds.X,
this.maxX);
}
}
var operation = new Convolution2DRowOperation<TPixel>(
interest,
targetPixels,
source.PixelBuffer,
map,
this.KernelY,
this.KernelX,
this.Configuration,
this.PreserveAlpha);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
ParallelRowIterator.IterateRows<Convolution2DRowOperation<TPixel>, Vector4>(
this.Configuration,
operationBounds,
in operation);
}
Buffer2D<TPixel>.SwapOrCopyContent(source.PixelBuffer, targetPixels);
}
}
}

193
src/ImageSharp/Processing/Processors/Convolution/Convolution2DRowOperation{TPixel}.cs

@ -0,0 +1,193 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
{
/// <summary>
/// A <see langword="struct"/> implementing the logic for 2D convolution.
/// </summary>
internal readonly struct Convolution2DRowOperation<TPixel> : IRowOperation<Vector4>
where TPixel : unmanaged, IPixel<TPixel>
{
private readonly Rectangle bounds;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly KernelSamplingMap map;
private readonly DenseMatrix<float> kernelMatrixY;
private readonly DenseMatrix<float> kernelMatrixX;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Convolution2DRowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
KernelSamplingMap map,
DenseMatrix<float> kernelMatrixY,
DenseMatrix<float> kernelMatrixX,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.map = map;
this.kernelMatrixY = kernelMatrixY;
this.kernelMatrixX = kernelMatrixX;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
}
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Invoke(int y, Span<Vector4> span)
{
if (this.preserveAlpha)
{
this.Convolve3(y, span);
}
else
{
this.Convolve4(y, span);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve3(int y, Span<Vector4> span)
{
// Span is 3x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> sourceBuffer = span.Slice(0, boundsWidth);
Span<Vector4> targetYBuffer = span.Slice(boundsWidth, boundsWidth);
Span<Vector4> targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth);
var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
// Clear the target buffers for each row run.
targetYBuffer.Clear();
targetXBuffer.Clear();
ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer);
ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer);
ReadOnlyKernel kernelY = state.KernelY;
ReadOnlyKernel kernelX = state.KernelX;
Span<TPixel> sourceRow;
for (int kY = 0; kY < kernelY.Rows; kY++)
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x);
ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x);
for (int kX = 0; kX < kernelY.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
targetY += kernelX[kY, kX] * sample;
targetX += kernelY[kY, kX] * sample;
}
}
}
// Now we need to combine the values and copy the original alpha values
// from the source row.
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
for (int x = 0; x < sourceRow.Length; x++)
{
ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x);
Vector4 vectorY = target;
Vector4 vectorX = Unsafe.Add(ref targetBaseX, x);
target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY));
target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W;
}
Span<TPixel> targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRowSpan);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve4(int y, Span<Vector4> span)
{
// Span is 3x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> sourceBuffer = span.Slice(0, boundsWidth);
Span<Vector4> targetYBuffer = span.Slice(boundsWidth, boundsWidth);
Span<Vector4> targetXBuffer = span.Slice(boundsWidth * 2, boundsWidth);
var state = new Convolution2DState(in this.kernelMatrixY, in this.kernelMatrixX, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
// Clear the target buffers for each row run.
targetYBuffer.Clear();
targetXBuffer.Clear();
ref Vector4 targetBaseY = ref MemoryMarshal.GetReference(targetYBuffer);
ref Vector4 targetBaseX = ref MemoryMarshal.GetReference(targetXBuffer);
ReadOnlyKernel kernelY = state.KernelY;
ReadOnlyKernel kernelX = state.KernelX;
for (int kY = 0; kY < kernelY.Rows; kY++)
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
Numerics.Premultiply(sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 targetY = ref Unsafe.Add(ref targetBaseY, x);
ref Vector4 targetX = ref Unsafe.Add(ref targetBaseX, x);
for (int kX = 0; kX < kernelY.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
targetY += kernelX[kY, kX] * sample;
targetX += kernelY[kY, kX] * sample;
}
}
}
// Now we need to combine the values
for (int x = 0; x < targetYBuffer.Length; x++)
{
ref Vector4 target = ref Unsafe.Add(ref targetBaseY, x);
Vector4 vectorY = target;
Vector4 vectorX = Unsafe.Add(ref targetBaseX, x);
target = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY));
}
Numerics.UnPremultiply(targetYBuffer);
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetYBuffer, targetRow);
}
}
}

54
src/ImageSharp/Processing/Processors/Convolution/Convolution2DState.cs

@ -0,0 +1,54 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
{
/// <summary>
/// A stack only struct used for reducing reference indirection during 2D convolution operations.
/// </summary>
internal readonly ref struct Convolution2DState
{
private readonly Span<int> rowOffsetMap;
private readonly Span<int> columnOffsetMap;
private readonly int kernelHeight;
private readonly int kernelWidth;
public Convolution2DState(
in DenseMatrix<float> kernelY,
in DenseMatrix<float> kernelX,
KernelSamplingMap map)
{
// We check the kernels are the same size upstream.
this.KernelY = new ReadOnlyKernel(kernelY);
this.KernelX = new ReadOnlyKernel(kernelX);
this.kernelHeight = kernelY.Rows;
this.kernelWidth = kernelY.Columns;
this.rowOffsetMap = map.GetRowOffsetSpan();
this.columnOffsetMap = map.GetColumnOffsetSpan();
}
public readonly ReadOnlyKernel KernelY
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get;
}
public readonly ReadOnlyKernel KernelX
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly ref int GetSampleRow(int row)
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly ref int GetSampleColumn(int column)
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth);
}
}

126
src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs

@ -42,12 +42,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
}
/// <summary>
/// Gets the horizontal gradient operator.
/// Gets the horizontal convolution kernel.
/// </summary>
public DenseMatrix<float> KernelX { get; }
/// <summary>
/// Gets the vertical gradient operator.
/// Gets the vertical convolution kernel.
/// </summary>
public DenseMatrix<float> KernelY { get; }
@ -63,96 +63,48 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds());
// Horizontal convolution
var horizontalOperation = new RowOperation(interest, firstPassPixels, source.PixelBuffer, this.KernelX, this.Configuration, this.PreserveAlpha);
ParallelRowIterator.IterateRows<RowOperation, Vector4>(
this.Configuration,
interest,
in horizontalOperation);
// We use a rectangle 2x the interest width to allocate a buffer big enough
// for source and target bulk pixel conversion.
var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height);
// Vertical convolution
var verticalOperation = new RowOperation(interest, source.PixelBuffer, firstPassPixels, this.KernelY, this.Configuration, this.PreserveAlpha);
ParallelRowIterator.IterateRows<RowOperation, Vector4>(
this.Configuration,
interest,
in verticalOperation);
}
/// <summary>
/// A <see langword="struct"/> implementing the convolution logic for <see cref="Convolution2PassProcessor{T}"/>.
/// </summary>
private readonly struct RowOperation : IRowOperation<Vector4>
{
private readonly Rectangle bounds;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly DenseMatrix<float> kernel;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
[MethodImpl(InliningOptions.ShortMethod)]
public RowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
DenseMatrix<float> kernel,
Configuration configuration,
bool preserveAlpha)
using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator))
{
this.bounds = bounds;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.kernel = kernel;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
mapX.BuildSamplingOffsetMap(this.KernelX, interest);
// Horizontal convolution
var horizontalOperation = new ConvolutionRowOperation<TPixel>(
interest,
firstPassPixels,
source.PixelBuffer,
mapX,
this.KernelX,
this.Configuration,
this.PreserveAlpha);
ParallelRowIterator.IterateRows<ConvolutionRowOperation<TPixel>, Vector4>(
this.Configuration,
operationBounds,
in horizontalOperation);
}
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(int y, Span<Vector4> span)
using (var mapY = new KernelSamplingMap(this.Configuration.MemoryAllocator))
{
ref Vector4 spanRef = ref MemoryMarshal.GetReference(span);
int maxY = this.bounds.Bottom - 1;
int maxX = this.bounds.Right - 1;
Span<TPixel> targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span);
if (this.preserveAlpha)
{
for (int x = 0; x < this.bounds.Width; x++)
{
DenseMatrixUtils.Convolve3(
in this.kernel,
this.sourcePixels,
ref spanRef,
y,
x,
this.bounds.Y,
maxY,
this.bounds.X,
maxX);
}
}
else
{
for (int x = 0; x < this.bounds.Width; x++)
{
DenseMatrixUtils.Convolve4(
in this.kernel,
this.sourcePixels,
ref spanRef,
y,
x,
this.bounds.Y,
maxY,
this.bounds.X,
maxX);
}
}
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
mapY.BuildSamplingOffsetMap(this.KernelY, interest);
// Vertical convolution
var verticalOperation = new ConvolutionRowOperation<TPixel>(
interest,
source.PixelBuffer,
firstPassPixels,
mapY,
this.KernelY,
this.Configuration,
this.PreserveAlpha);
ParallelRowIterator.IterateRows<ConvolutionRowOperation<TPixel>, Vector4>(
this.Configuration,
operationBounds,
in verticalOperation);
}
}
}

131
src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessor{TPixel}.cs

@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
}
/// <summary>
/// Gets the 2d gradient operator.
/// Gets the 2d convolution kernel.
/// </summary>
public DenseMatrix<float> KernelXY { get; }
@ -51,16 +51,26 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// <inheritdoc/>
protected override void OnFrameApply(ImageFrame<TPixel> source)
{
using Buffer2D<TPixel> targetPixels = this.Configuration.MemoryAllocator.Allocate2D<TPixel>(source.Size());
MemoryAllocator allocator = this.Configuration.MemoryAllocator;
using Buffer2D<TPixel> targetPixels = allocator.Allocate2D<TPixel>(source.Size());
source.CopyTo(targetPixels);
var interest = Rectangle.Intersect(this.SourceRectangle, source.Bounds());
var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, this.KernelXY, this.Configuration, this.PreserveAlpha);
ParallelRowIterator.IterateRows<RowOperation, Vector4>(
this.Configuration,
interest,
in operation);
// We use a rectangle 2x the interest width to allocate a buffer big enough
// for source and target bulk pixel conversion.
var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height);
using (var map = new KernelSamplingMap(allocator))
{
map.BuildSamplingOffsetMap(this.KernelXY, interest);
var operation = new RowOperation(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha);
ParallelRowIterator.IterateRows<RowOperation, Vector4>(
this.Configuration,
operationBounds,
in operation);
}
Buffer2D<TPixel>.SwapOrCopyContent(source.PixelBuffer, targetPixels);
}
@ -71,10 +81,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
private readonly struct RowOperation : IRowOperation<Vector4>
{
private readonly Rectangle bounds;
private readonly int maxY;
private readonly int maxX;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly KernelSamplingMap map;
private readonly DenseMatrix<float> kernel;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
@ -84,15 +93,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
KernelSamplingMap map,
DenseMatrix<float> kernel,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
this.maxY = this.bounds.Bottom - 1;
this.maxX = this.bounds.Right - 1;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.map = map;
this.kernel = kernel;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
@ -102,45 +111,93 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
[MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(int y, Span<Vector4> span)
{
ref Vector4 spanRef = ref MemoryMarshal.GetReference(span);
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
ref Vector4 targetRowRef = ref MemoryMarshal.GetReference(span);
Span<TPixel> targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
Span<TPixel> targetRowSpan = this.targetPixels.GetRowSpan(y).Slice(this.bounds.X);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, targetRowSpan.Slice(0, span.Length), span);
var state = new ConvolutionState(in this.kernel, this.map);
int row = y - this.bounds.Y;
ref int sampleRowBase = ref state.GetSampleRow(row);
if (this.preserveAlpha)
{
for (int x = 0; x < this.bounds.Width; x++)
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
Span<TPixel> sourceRow;
for (int kY = 0; kY < state.Kernel.Rows; kY++)
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int offsetY = Unsafe.Add(ref sampleRowBase, kY);
sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
for (int kX = 0; kX < state.Kernel.Columns; kX++)
{
int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, offsetX);
target += state.Kernel[kY, kX] * sample;
}
}
}
// Now we need to copy the original alpha values from the source row.
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
for (int x = 0; x < sourceRow.Length; x++)
{
DenseMatrixUtils.Convolve3(
in this.kernel,
this.sourcePixels,
ref spanRef,
y,
x,
this.bounds.Y,
this.maxY,
this.bounds.X,
this.maxX);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W;
}
}
else
{
for (int x = 0; x < this.bounds.Width; x++)
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
for (int kY = 0; kY < state.Kernel.Rows; kY++)
{
DenseMatrixUtils.Convolve4(
in this.kernel,
this.sourcePixels,
ref spanRef,
y,
x,
this.bounds.Y,
this.maxY,
this.bounds.X,
this.maxX);
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int offsetY = Unsafe.Add(ref sampleRowBase, kY);
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(offsetY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
Numerics.Premultiply(sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
for (int kX = 0; kX < state.Kernel.Columns; kX++)
{
int offsetX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, offsetX);
target += state.Kernel[kY, kX] * sample;
}
}
}
Numerics.UnPremultiply(targetBuffer);
}
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRowSpan);
}
}
}

163
src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs

@ -0,0 +1,163 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
{
/// <summary>
/// A <see langword="struct"/> implementing the logic for 1D convolution.
/// </summary>
internal readonly struct ConvolutionRowOperation<TPixel> : IRowOperation<Vector4>
where TPixel : unmanaged, IPixel<TPixel>
{
private readonly Rectangle bounds;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly KernelSamplingMap map;
private readonly DenseMatrix<float> kernelMatrix;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ConvolutionRowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
KernelSamplingMap map,
DenseMatrix<float> kernelMatrix,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.map = map;
this.kernelMatrix = kernelMatrix;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
}
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Invoke(int y, Span<Vector4> span)
{
if (this.preserveAlpha)
{
this.Convolve3(y, span);
}
else
{
this.Convolve4(y, span);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve3(int y, Span<Vector4> span)
{
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
var state = new ConvolutionState(in this.kernelMatrix, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ReadOnlyKernel kernel = state.Kernel;
Span<TPixel> sourceRow;
for (int kY = 0; kY < kernel.Rows; kY++)
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
for (int kX = 0; kX < kernel.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
target += kernel[kY, kX] * sample;
}
}
}
// Now we need to copy the original alpha values from the source row.
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
for (int x = 0; x < sourceRow.Length; x++)
{
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W;
}
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve4(int y, Span<Vector4> span)
{
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
var state = new ConvolutionState(in this.kernelMatrix, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ReadOnlyKernel kernel = state.Kernel;
for (int kY = 0; kY < kernel.Rows; kY++)
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
Numerics.Premultiply(sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
for (int kX = 0; kX < kernel.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
target += kernel[kY, kX] * sample;
}
}
}
Numerics.UnPremultiply(targetBuffer);
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
}
}

45
src/ImageSharp/Processing/Processors/Convolution/ConvolutionState.cs

@ -0,0 +1,45 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
{
/// <summary>
/// A stack only struct used for reducing reference indirection during convolution operations.
/// </summary>
internal readonly ref struct ConvolutionState
{
private readonly Span<int> rowOffsetMap;
private readonly Span<int> columnOffsetMap;
private readonly int kernelHeight;
private readonly int kernelWidth;
public ConvolutionState(
in DenseMatrix<float> kernel,
KernelSamplingMap map)
{
this.Kernel = new ReadOnlyKernel(kernel);
this.kernelHeight = kernel.Rows;
this.kernelWidth = kernel.Columns;
this.rowOffsetMap = map.GetRowOffsetSpan();
this.columnOffsetMap = map.GetColumnOffsetSpan();
}
public readonly ReadOnlyKernel Kernel
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly ref int GetSampleRow(int row)
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.rowOffsetMap), row * this.kernelHeight);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly ref int GetSampleColumn(int column)
=> ref Unsafe.Add(ref MemoryMarshal.GetReference(this.columnOffsetMap), column * this.kernelWidth);
}
}

102
src/ImageSharp/Processing/Processors/Convolution/KernelSamplingMap.cs

@ -0,0 +1,102 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
{
/// <summary>
/// Provides a map of the convolution kernel sampling offsets.
/// </summary>
internal sealed class KernelSamplingMap : IDisposable
{
private readonly MemoryAllocator allocator;
private bool isDisposed;
private IMemoryOwner<int> yOffsets;
private IMemoryOwner<int> xOffsets;
/// <summary>
/// Initializes a new instance of the <see cref="KernelSamplingMap"/> class.
/// </summary>
/// <param name="allocator">The memory allocator.</param>
public KernelSamplingMap(MemoryAllocator allocator) => this.allocator = allocator;
/// <summary>
/// Builds a map of the sampling offsets for the kernel clamped by the given bounds.
/// </summary>
/// <param name="kernel">The convolution kernel.</param>
/// <param name="bounds">The source bounds.</param>
public void BuildSamplingOffsetMap(DenseMatrix<float> kernel, Rectangle bounds)
{
int kernelHeight = kernel.Rows;
int kernelWidth = kernel.Columns;
this.yOffsets = this.allocator.Allocate<int>(bounds.Height * kernelHeight);
this.xOffsets = this.allocator.Allocate<int>(bounds.Width * kernelWidth);
int minY = bounds.Y;
int maxY = bounds.Bottom - 1;
int minX = bounds.X;
int maxX = bounds.Right - 1;
int radiusY = kernelHeight >> 1;
int radiusX = kernelWidth >> 1;
// Calculate the y and x sampling offsets clamped to the given rectangle.
// While this isn't a hotpath we still dip into unsafe to avoid the span bounds
// checks as the can potentially be looping over large arrays.
Span<int> ySpan = this.yOffsets.GetSpan();
ref int ySpanBase = ref MemoryMarshal.GetReference(ySpan);
for (int row = 0; row < bounds.Height; row++)
{
int rowBase = row * kernelHeight;
for (int y = 0; y < kernelHeight; y++)
{
Unsafe.Add(ref ySpanBase, rowBase + y) = row + y + minY - radiusY;
}
}
if (kernelHeight > 1)
{
Numerics.Clamp(ySpan, minY, maxY);
}
Span<int> xSpan = this.xOffsets.GetSpan();
ref int xSpanBase = ref MemoryMarshal.GetReference(xSpan);
for (int column = 0; column < bounds.Width; column++)
{
int columnBase = column * kernelWidth;
for (int x = 0; x < kernelWidth; x++)
{
Unsafe.Add(ref xSpanBase, columnBase + x) = column + x + minX - radiusX;
}
}
if (kernelWidth > 1)
{
Numerics.Clamp(xSpan, minX, maxX);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Span<int> GetRowOffsetSpan() => this.yOffsets.GetSpan();
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Span<int> GetColumnOffsetSpan() => this.xOffsets.GetSpan();
/// <inheritdoc/>
public void Dispose()
{
if (!this.isDisposed)
{
this.yOffsets.Dispose();
this.xOffsets.Dispose();
this.isDisposed = true;
}
}
}
}

63
src/ImageSharp/Processing/Processors/Convolution/ReadOnlyKernel.cs

@ -0,0 +1,63 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
{
/// <summary>
/// A stack only, readonly, kernel matrix that can be indexed without
/// bounds checks when compiled in release mode.
/// </summary>
internal readonly ref struct ReadOnlyKernel
{
private readonly ReadOnlySpan<float> values;
public ReadOnlyKernel(DenseMatrix<float> matrix)
{
this.Columns = matrix.Columns;
this.Rows = matrix.Rows;
this.values = matrix.Span;
}
public int Columns
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get;
}
public int Rows
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get;
}
public float this[int row, int column]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
this.CheckCoordinates(row, column);
ref float vBase = ref MemoryMarshal.GetReference(this.values);
return Unsafe.Add(ref vBase, (row * this.Columns) + column);
}
}
[Conditional("DEBUG")]
private void CheckCoordinates(int row, int column)
{
if (row < 0 || row >= this.Rows)
{
throw new ArgumentOutOfRangeException(nameof(row), row, $"{row} is outwith the matrix bounds.");
}
if (column < 0 || column >= this.Columns)
{
throw new ArgumentOutOfRangeException(nameof(column), column, $"{column} is outwith the matrix bounds.");
}
}
}
}

4
src/ImageSharp/Processing/Processors/Filters/FilterProcessor{TPixel}.cs

@ -72,11 +72,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Filters
public void Invoke(int y, Span<Vector4> span)
{
Span<TPixel> rowSpan = this.source.GetPixelRowSpan(y).Slice(this.startX, span.Length);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, rowSpan, span);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, rowSpan, span, PixelConversionModifiers.Scale);
ColorNumerics.Transform(span, ref Unsafe.AsRef(this.matrix));
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, span, rowSpan);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, span, rowSpan, PixelConversionModifiers.Scale);
}
}
}

8
tests/ImageSharp.Benchmarks/Config.cs

@ -27,6 +27,14 @@ namespace SixLabors.ImageSharp.Benchmarks
}
public class MultiFramework : Config
{
public MultiFramework() => this.AddJob(
Job.Default.WithRuntime(ClrRuntime.Net472),
Job.Default.WithRuntime(CoreRuntime.Core21),
Job.Default.WithRuntime(CoreRuntime.Core31));
}
public class ShortClr : Config
{
public ShortClr() => this.AddJob(

286
tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs

@ -0,0 +1,286 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
{
public unsafe class PixelConversion_PackFromRgbPlanes
{
private byte[] rBuf;
private byte[] gBuf;
private byte[] bBuf;
private Rgb24[] rgbBuf;
private Rgba32[] rgbaBuf;
private float[] rFloat;
private float[] gFloat;
private float[] bFloat;
private float[] rgbaFloat;
[Params(1024)]
public int Count { get; set; }
[GlobalSetup]
public void Setup()
{
this.rBuf = new byte[this.Count];
this.gBuf = new byte[this.Count];
this.bBuf = new byte[this.Count];
this.rgbBuf = new Rgb24[this.Count + 3]; // padded
this.rgbaBuf = new Rgba32[this.Count];
this.rFloat = new float[this.Count];
this.gFloat = new float[this.Count];
this.bFloat = new float[this.Count];
this.rgbaFloat = new float[this.Count * 4];
}
// [Benchmark]
public void Rgb24_Scalar_PerElement_Pinned()
{
fixed (byte* r = &this.rBuf[0])
fixed (byte* g = &this.gBuf[0])
fixed (byte* b = &this.bBuf[0])
fixed (Rgb24* rgb = &this.rgbBuf[0])
{
for (int i = 0; i < this.Count; i++)
{
Rgb24* d = rgb + i;
d->R = r[i];
d->G = g[i];
d->B = b[i];
}
}
}
[Benchmark]
public void Rgb24_Scalar_PerElement_Span()
{
Span<byte> r = this.rBuf;
Span<byte> g = this.rBuf;
Span<byte> b = this.rBuf;
Span<Rgb24> rgb = this.rgbBuf;
for (int i = 0; i < r.Length; i++)
{
ref Rgb24 d = ref rgb[i];
d.R = r[i];
d.G = g[i];
d.B = b[i];
}
}
[Benchmark]
public void Rgb24_Scalar_PerElement_Unsafe()
{
ref byte r = ref this.rBuf[0];
ref byte g = ref this.rBuf[0];
ref byte b = ref this.rBuf[0];
ref Rgb24 rgb = ref this.rgbBuf[0];
for (int i = 0; i < this.Count; i++)
{
ref Rgb24 d = ref Unsafe.Add(ref rgb, i);
d.R = Unsafe.Add(ref r, i);
d.G = Unsafe.Add(ref g, i);
d.B = Unsafe.Add(ref b, i);
}
}
[Benchmark]
public void Rgb24_Scalar_PerElement_Batched8()
{
ref Byte8 r = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]);
ref Byte8 g = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]);
ref Byte8 b = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]);
ref Rgb24 rgb = ref this.rgbBuf[0];
int count = this.Count / 8;
for (int i = 0; i < count; i++)
{
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 8);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
ref Rgb24 d4 = ref Unsafe.Add(ref d0, 4);
ref Rgb24 d5 = ref Unsafe.Add(ref d0, 5);
ref Rgb24 d6 = ref Unsafe.Add(ref d0, 6);
ref Rgb24 d7 = ref Unsafe.Add(ref d0, 7);
ref Byte8 rr = ref Unsafe.Add(ref r, i);
ref Byte8 gg = ref Unsafe.Add(ref g, i);
ref Byte8 bb = ref Unsafe.Add(ref b, i);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
d4.R = rr.V4;
d4.G = gg.V4;
d4.B = bb.V4;
d5.R = rr.V5;
d5.G = gg.V5;
d5.B = bb.V5;
d6.R = rr.V6;
d6.G = gg.V6;
d6.B = bb.V6;
d7.R = rr.V7;
d7.G = gg.V7;
d7.B = bb.V7;
}
}
[Benchmark]
public void Rgb24_Scalar_PerElement_Batched4()
{
ref Byte4 r = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]);
ref Byte4 g = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]);
ref Byte4 b = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]);
ref Rgb24 rgb = ref this.rgbBuf[0];
int count = this.Count / 4;
for (int i = 0; i < count; i++)
{
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
ref Byte4 rr = ref Unsafe.Add(ref r, i);
ref Byte4 gg = ref Unsafe.Add(ref g, i);
ref Byte4 bb = ref Unsafe.Add(ref b, i);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
}
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark(Baseline = true)]
public void Rgba32_Avx2_Float()
{
ref Vector256<float> rBase = ref Unsafe.As<float, Vector256<float>>(ref this.rFloat[0]);
ref Vector256<float> gBase = ref Unsafe.As<float, Vector256<float>>(ref this.gFloat[0]);
ref Vector256<float> bBase = ref Unsafe.As<float, Vector256<float>>(ref this.bFloat[0]);
ref Vector256<float> resultBase = ref Unsafe.As<float, Vector256<float>>(ref this.rgbaFloat[0]);
int count = this.Count / Vector256<float>.Count;
ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
var va = Vector256.Create(1F);
for (int i = 0; i < count; i++)
{
Vector256<float> r = Unsafe.Add(ref rBase, i);
Vector256<float> g = Unsafe.Add(ref gBase, i);
Vector256<float> b = Unsafe.Add(ref bBase, i);
r = Avx2.PermuteVar8x32(r, vcontrol);
g = Avx2.PermuteVar8x32(g, vcontrol);
b = Avx2.PermuteVar8x32(b, vcontrol);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
}
[Benchmark]
public void Rgb24_Avx2_Bytes()
{
ReadOnlySpan<byte> r = this.rBuf;
ReadOnlySpan<byte> g = this.rBuf;
ReadOnlySpan<byte> b = this.rBuf;
Span<Rgb24> rgb = this.rgbBuf;
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb);
}
[Benchmark]
public void Rgba32_Avx2_Bytes()
{
ReadOnlySpan<byte> r = this.rBuf;
ReadOnlySpan<byte> g = this.rBuf;
ReadOnlySpan<byte> b = this.rBuf;
Span<Rgba32> rgb = this.rgbaBuf;
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb);
}
#endif
#pragma warning disable SA1132
private struct Byte8
{
public byte V0, V1, V2, V3, V4, V5, V6, V7;
}
private struct Byte4
{
public byte V0, V1, V2, V3;
}
#pragma warning restore
// Results @ Anton's PC, 2020 Dec 05
// .NET Core 3.1.1
// Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
//
// | Method | Count | Mean | Error | StdDev | Ratio | RatioSD |
// |--------------------------------- |------ |-----------:|---------:|---------:|------:|--------:|
// | Rgb24_Scalar_PerElement_Span | 1024 | 1,634.6 ns | 26.56 ns | 24.84 ns | 3.12 | 0.05 |
// | Rgb24_Scalar_PerElement_Unsafe | 1024 | 1,284.7 ns | 4.70 ns | 4.16 ns | 2.46 | 0.01 |
// | Rgb24_Scalar_PerElement_Batched8 | 1024 | 1,182.3 ns | 5.12 ns | 4.27 ns | 2.26 | 0.01 |
// | Rgb24_Scalar_PerElement_Batched4 | 1024 | 1,146.2 ns | 16.38 ns | 14.52 ns | 2.19 | 0.02 |
// | Rgba32_Avx2_Float | 1024 | 522.7 ns | 1.78 ns | 1.39 ns | 1.00 | 0.00 |
// | Rgb24_Avx2_Bytes | 1024 | 243.3 ns | 1.56 ns | 1.30 ns | 0.47 | 0.00 |
// | Rgba32_Avx2_Bytes | 1024 | 146.0 ns | 2.48 ns | 2.32 ns | 0.28 | 0.01 |
}
}

2
tests/ImageSharp.Benchmarks/Samplers/GaussianBlur.cs

@ -7,7 +7,7 @@ using SixLabors.ImageSharp.Processing;
namespace SixLabors.ImageSharp.Benchmarks.Samplers
{
[Config(typeof(Config.ShortClr))]
[Config(typeof(Config.MultiFramework))]
public class GaussianBlur
{
[Benchmark]

159
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -5,8 +5,10 @@ using System;
using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Tuples;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
@ -169,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Common
public static readonly TheoryData<int> ArbitraryArraySizes =
new TheoryData<int>
{
0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520,
0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520,
};
[Theory]
@ -336,90 +338,135 @@ namespace SixLabors.ImageSharp.Tests.Common
}
}
private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
int count,
Action<Memory<float>,
Memory<byte>> convert,
int seed = -1)
[Theory]
[MemberData(nameof(ArbitraryArraySizes))]
public void PackFromRgbPlanes_Rgb24(int count)
{
seed = seed > 0 ? seed : count;
float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f);
byte[] expected = source.Select(NormalizedFloatToByte).ToArray();
var actual = new byte[count];
convert(source, actual);
Assert.Equal(expected, actual);
TestPackFromRgbPlanes<Rgb24>(
count,
(r, g, b, actual) =>
SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual));
}
private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f));
[Theory]
[InlineData(0)]
[InlineData(7)]
[InlineData(42)]
[InlineData(255)]
[InlineData(256)]
[InlineData(257)]
private void MagicConvertToByte(float value)
[MemberData(nameof(ArbitraryArraySizes))]
public void PackFromRgbPlanes_Rgba32(int count)
{
byte actual = MagicConvert(value / 256f);
var expected = (byte)value;
Assert.Equal(expected, actual);
TestPackFromRgbPlanes<Rgba32>(
count,
(r, g, b, actual) =>
SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual));
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
private void BulkConvertNormalizedFloatToByte_Step()
public void PackFromRgbPlanesAvx2Reduce_Rgb24()
{
if (this.SkipOnNonAvx2())
if (!Avx2.IsSupported)
{
return;
}
float[] source = { 0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f };
byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray();
byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray();
byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray();
const int padding = 4;
Rgb24[] d = new Rgb24[32 + padding];
byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray();
ReadOnlySpan<byte> rr = r.AsSpan();
ReadOnlySpan<byte> gg = g.AsSpan();
ReadOnlySpan<byte> bb = b.AsSpan();
Span<Rgb24> dd = d.AsSpan();
source = source.Select(f => f / 255f).ToArray();
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd);
Span<byte> dest = stackalloc byte[8];
this.MagicConvert(source, dest);
for (int i = 0; i < 32; i++)
{
Assert.Equal(i, d[i].R);
Assert.Equal(i + 100, d[i].G);
Assert.Equal(i + 200, d[i].B);
}
Assert.True(dest.SequenceEqual(expected));
Assert.Equal(0, rr.Length);
Assert.Equal(0, gg.Length);
Assert.Equal(0, bb.Length);
Assert.Equal(padding, dd.Length);
}
private static byte MagicConvert(float x)
[Fact]
public void PackFromRgbPlanesAvx2Reduce_Rgba32()
{
float f = 32768.0f + x;
uint i = Unsafe.As<float, uint>(ref f);
return (byte)i;
}
if (!Avx2.IsSupported)
{
return;
}
private void MagicConvert(Span<float> source, Span<byte> dest)
{
var magick = new Vector<float>(32768.0f);
byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray();
byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray();
byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray();
var scale = new Vector<float>(255f) / new Vector<float>(256f);
Rgba32[] d = new Rgba32[32];
Vector<float> x = MemoryMarshal.Cast<float, Vector<float>>(source)[0];
ReadOnlySpan<byte> rr = r.AsSpan();
ReadOnlySpan<byte> gg = g.AsSpan();
ReadOnlySpan<byte> bb = b.AsSpan();
Span<Rgba32> dd = d.AsSpan();
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd);
for (int i = 0; i < 32; i++)
{
Assert.Equal(i, d[i].R);
Assert.Equal(i + 100, d[i].G);
Assert.Equal(i + 200, d[i].B);
Assert.Equal(255, d[i].A);
}
x = (x * scale) + magick;
Assert.Equal(0, rr.Length);
Assert.Equal(0, gg.Length);
Assert.Equal(0, bb.Length);
Assert.Equal(0, dd.Length);
}
#endif
internal static void TestPackFromRgbPlanes<TPixel>(int count, Action<byte[], byte[], byte[], TPixel[]> packMethod)
where TPixel : unmanaged, IPixel<TPixel>
{
Random rnd = new Random(42);
byte[] r = rnd.GenerateRandomByteArray(count);
byte[] g = rnd.GenerateRandomByteArray(count);
byte[] b = rnd.GenerateRandomByteArray(count);
TPixel[] expected = new TPixel[count];
for (int i = 0; i < count; i++)
{
expected[i].FromRgb24(new Rgb24(r[i], g[i], b[i]));
}
Tuple8.OfUInt32 ii = default;
TPixel[] actual = new TPixel[count + 3]; // padding for Rgb24 AVX2
packMethod(r, g, b, actual);
ref Vector<float> iiRef = ref Unsafe.As<Tuple8.OfUInt32, Vector<float>>(ref ii);
Assert.True(expected.AsSpan().SequenceEqual(actual.AsSpan().Slice(0, count)));
}
iiRef = x;
private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
int count,
Action<Memory<float>,
Memory<byte>> convert,
int seed = -1)
{
seed = seed > 0 ? seed : count;
float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f);
byte[] expected = source.Select(NormalizedFloatToByte).ToArray();
var actual = new byte[count];
ref Tuple8.OfByte d = ref MemoryMarshal.Cast<byte, Tuple8.OfByte>(dest)[0];
d.LoadFrom(ref ii);
convert(source, actual);
this.Output.WriteLine(ii.ToString());
this.Output.WriteLine(d.ToString());
Assert.Equal(expected, actual);
}
private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, (f * 255f) + 0.5f));
private static void AssertEvenRoundIsCorrect(Vector<float> r, Vector<float> v)
{
for (int i = 0; i < Vector<float>.Count; i++)

18
tests/ImageSharp.Tests/PixelFormats/PixelOperations/PixelOperationsTests.cs

@ -10,6 +10,7 @@ using System.Runtime.InteropServices;
using SixLabors.ImageSharp.ColorSpaces.Companding;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests.Common;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
@ -1002,6 +1003,19 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations
(s, d) => this.Operations.ToRgba64Bytes(this.Configuration, s, d.GetSpan(), count));
}
[Theory]
[MemberData(nameof(ArraySizesData))]
public void PackFromRgbPlanes(int count)
{
SimdUtilsTests.TestPackFromRgbPlanes<TPixel>(
count,
(
r,
g,
b,
actual) => PixelOperations<TPixel>.Instance.PackFromRgbPlanes(this.Configuration, r, g, b, actual));
}
public delegate void RefAction<T1>(ref T1 arg1);
internal static Vector4[] CreateExpectedVector4Data(TPixel[] source, RefAction<Vector4> vectorModifier = null)
@ -1102,10 +1116,10 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations
return result;
}
internal static byte[] CreateByteTestData(int length)
internal static byte[] CreateByteTestData(int length, int seed = 42)
{
byte[] result = new byte[length];
var rnd = new Random(42); // Deterministic random values
var rnd = new Random(seed); // Deterministic random values
for (int i = 0; i < result.Length; i++)
{

33
tests/ImageSharp.Tests/Processing/Filters/BrightnessTest.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.Processing.Processors.Filters;
using Xunit;
@ -26,5 +27,33 @@ namespace SixLabors.ImageSharp.Tests.Processing.Effects
Assert.Equal(1.5F, processor.Amount);
}
[Fact]
public void Brightness_scaled_vector()
{
var rgbImage = new Image<Rgb24>(Configuration.Default, 100, 100, new Rgb24(0, 0, 0));
rgbImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2)));
Assert.Equal(new Rgb24(0, 0, 0), rgbImage[0, 0]);
rgbImage = new Image<Rgb24>(Configuration.Default, 100, 100, new Rgb24(10, 10, 10));
rgbImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2)));
Assert.Equal(new Rgb24(20, 20, 20), rgbImage[0, 0]);
var halfSingleImage = new Image<HalfSingle>(Configuration.Default, 100, 100, new HalfSingle(-1));
halfSingleImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2)));
Assert.Equal(new HalfSingle(-1), halfSingleImage[0, 0]);
halfSingleImage = new Image<HalfSingle>(Configuration.Default, 100, 100, new HalfSingle(-0.5f));
halfSingleImage.Mutate(x => x.ApplyProcessor(new BrightnessProcessor(2)));
Assert.Equal(new HalfSingle(0), halfSingleImage[0, 0]);
}
}
}
}

Loading…
Cancel
Save