Browse Source

AVX2 implemetation seems to work

js/color-alpha-handling
Anton Firszov 5 years ago
parent
commit
7e33e3fbdb
  1. 106
      src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
  2. 39
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

106
src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs

@ -3,10 +3,29 @@ using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
private static ReadOnlySpan<byte> ShuffleMaskShiftAlpha =>
new byte[]
{
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
};
public static ReadOnlySpan<byte> PermuteMaskShiftAlpha8x32 =>
new byte[]
{
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
};
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
@ -15,7 +34,17 @@ namespace SixLabors.ImageSharp
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
}
else
#endif
{
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
}
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
}
@ -29,6 +58,81 @@ namespace SixLabors.ImageSharp
{
}
#if SUPPORTS_RUNTIME_INTRINSICS
internal static void PackFromRgbPlanesAvx2Reduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgb24> destination)
{
ref Vector256<byte> rBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(redChannel));
ref Vector256<byte> gBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(greenChannel));
ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel));
ref byte dBase = ref Unsafe.As<Rgb24, byte>(ref MemoryMarshal.GetReference(destination));
int count = redChannel.Length / Vector256<byte>.Count;
ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
Vector256<byte> a = Vector256.Create((byte)255);
Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
for (int i = 0; i < count; i++)
{
Vector256<byte> r0 = Unsafe.Add(ref rBase, i);
Vector256<byte> g0 = Unsafe.Add(ref gBase, i);
Vector256<byte> b0 = Unsafe.Add(ref bBase, i);
r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
Vector256<byte> rg = Avx2.UnpackLow(r0, g0);
Vector256<byte> b1 = Avx2.UnpackLow(b0, a);
Vector256<byte> rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rg = Avx2.UnpackHigh(r0, g0);
b1 = Avx2.UnpackHigh(b0, a);
Vector256<byte> rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha);
rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha);
rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha);
rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha);
rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte();
rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte();
rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte();
rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte();
ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i);
ref byte d2 = ref Unsafe.Add(ref d1, 24);
ref byte d3 = ref Unsafe.Add(ref d2, 24);
ref byte d4 = ref Unsafe.Add(ref d3, 24);
Unsafe.As<byte, Vector256<byte>>(ref d1) = rgb1;
Unsafe.As<byte, Vector256<byte>>(ref d2) = rgb2;
Unsafe.As<byte, Vector256<byte>>(ref d3) = rgb3;
Unsafe.As<byte, Vector256<byte>>(ref d4) = rgb4;
}
int slice = count * Vector256<byte>.Count;
redChannel = redChannel.Slice(slice);
greenChannel = greenChannel.Slice(slice);
blueChannel = blueChannel.Slice(slice);
destination = destination.Slice(slice);
}
#endif
private static void PackFromRgbPlanesScalarBatchedReduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,

39
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -6,6 +6,7 @@ using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.Common.Tuples;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests.TestUtilities;
@ -170,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Common
public static readonly TheoryData<int> ArbitraryArraySizes =
new TheoryData<int>
{
0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520,
0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520,
};
[Theory]
@ -357,6 +358,42 @@ namespace SixLabors.ImageSharp.Tests.Common
SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual));
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void PackFromRgbPlanesAvx2Reduce_Rgb24()
{
if (!Avx2.IsSupported)
{
return;
}
byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray();
byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray();
byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray();
const int padding = 4;
Rgb24[] d = new Rgb24[32 + padding];
ReadOnlySpan<byte> rr = r.AsSpan();
ReadOnlySpan<byte> gg = g.AsSpan();
ReadOnlySpan<byte> bb = b.AsSpan();
Span<Rgb24> dd = d.AsSpan();
SimdUtils.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd);
for (int i = 0; i < 32; i++)
{
Assert.Equal(i, d[i].R);
Assert.Equal(i + 100, d[i].G);
Assert.Equal(i + 200, d[i].B);
}
Assert.Equal(0, rr.Length);
Assert.Equal(0, gg.Length);
Assert.Equal(0, bb.Length);
Assert.Equal(padding, dd.Length);
}
#endif
internal static void TestPackFromRgbPlanes<TPixel>(int count, Action<byte[], byte[], byte[], TPixel[]> packMethod)
where TPixel : unmanaged, IPixel<TPixel>
{

Loading…
Cancel
Save