Browse Source

started Rgba32

js/color-alpha-handling
Anton Firszov 6 years ago
parent
commit
9e0b7fc874
  1. 88
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 147
      src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
  3. 5
      src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
  4. 22
      tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs
  5. 2
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

88
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -7,6 +7,7 @@ using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
{
@ -22,6 +23,20 @@ namespace SixLabors.ImageSharp
private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
private static ReadOnlySpan<byte> ShuffleMaskShiftAlpha =>
new byte[]
{
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
};
public static ReadOnlySpan<byte> PermuteMaskShiftAlpha8x32 =>
new byte[]
{
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
};
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
@ -789,6 +804,79 @@ namespace SixLabors.ImageSharp
}
}
}
internal static void PackFromRgbPlanesAvx2Reduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgb24> destination)
{
ref Vector256<byte> rBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(redChannel));
ref Vector256<byte> gBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(greenChannel));
ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel));
ref byte dBase = ref Unsafe.As<Rgb24, byte>(ref MemoryMarshal.GetReference(destination));
int count = redChannel.Length / Vector256<byte>.Count;
ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
Vector256<byte> a = Vector256.Create((byte)255);
Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
for (int i = 0; i < count; i++)
{
Vector256<byte> r0 = Unsafe.Add(ref rBase, i);
Vector256<byte> g0 = Unsafe.Add(ref gBase, i);
Vector256<byte> b0 = Unsafe.Add(ref bBase, i);
r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
Vector256<byte> rg = Avx2.UnpackLow(r0, g0);
Vector256<byte> b1 = Avx2.UnpackLow(b0, a);
Vector256<byte> rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rg = Avx2.UnpackHigh(r0, g0);
b1 = Avx2.UnpackHigh(b0, a);
Vector256<byte> rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha);
rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha);
rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha);
rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha);
rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte();
rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte();
rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte();
rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte();
ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i);
ref byte d2 = ref Unsafe.Add(ref d1, 24);
ref byte d3 = ref Unsafe.Add(ref d2, 24);
ref byte d4 = ref Unsafe.Add(ref d3, 24);
Unsafe.As<byte, Vector256<byte>>(ref d1) = rgb1;
Unsafe.As<byte, Vector256<byte>>(ref d2) = rgb2;
Unsafe.As<byte, Vector256<byte>>(ref d3) = rgb3;
Unsafe.As<byte, Vector256<byte>>(ref d4) = rgb4;
}
int slice = count * Vector256<byte>.Count;
redChannel = redChannel.Slice(slice);
greenChannel = greenChannel.Slice(slice);
blueChannel = blueChannel.Slice(slice);
destination = destination.Slice(slice);
}
}
}
}

147
src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs

@ -12,20 +12,6 @@ namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
private static ReadOnlySpan<byte> ShuffleMaskShiftAlpha =>
new byte[]
{
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
};
public static ReadOnlySpan<byte> PermuteMaskShiftAlpha8x32 =>
new byte[]
{
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
};
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
@ -34,10 +20,17 @@ namespace SixLabors.ImageSharp
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
int count = redChannel.Length;
DebugGuard.IsTrue(greenChannel.Length == count, "Channels must be of same size!");
DebugGuard.IsTrue(blueChannel.Length == count, "Channels must be of same size!");
// To avoid overflows, this check is not debug-only:
Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
}
else
#endif
@ -56,101 +49,76 @@ namespace SixLabors.ImageSharp
ReadOnlySpan<byte> blueChannel,
Span<Rgba32> destination)
{
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
}
#if SUPPORTS_RUNTIME_INTRINSICS
internal static void PackFromRgbPlanesAvx2Reduce(
private static void PackFromRgbPlanesScalarBatchedReduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgb24> destination)
{
ref Vector256<byte> rBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(redChannel));
ref Vector256<byte> gBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(greenChannel));
ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel));
ref byte dBase = ref Unsafe.As<Rgb24, byte>(ref MemoryMarshal.GetReference(destination));
int count = redChannel.Length / Vector256<byte>.Count;
ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
Vector256<byte> a = Vector256.Create((byte)255);
Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel));
ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel));
ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel));
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
int count = destination.Length / 4;
for (int i = 0; i < count; i++)
{
Vector256<byte> r0 = Unsafe.Add(ref rBase, i);
Vector256<byte> g0 = Unsafe.Add(ref gBase, i);
Vector256<byte> b0 = Unsafe.Add(ref bBase, i);
r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
Vector256<byte> rg = Avx2.UnpackLow(r0, g0);
Vector256<byte> b1 = Avx2.UnpackLow(b0, a);
Vector256<byte> rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
rg = Avx2.UnpackHigh(r0, g0);
b1 = Avx2.UnpackHigh(b0, a);
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
Vector256<byte> rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
Vector256<byte> rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha);
rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha);
rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha);
rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte();
rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte();
rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte();
rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte();
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i);
ref byte d2 = ref Unsafe.Add(ref d1, 24);
ref byte d3 = ref Unsafe.Add(ref d2, 24);
ref byte d4 = ref Unsafe.Add(ref d3, 24);
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
Unsafe.As<byte, Vector256<byte>>(ref d1) = rgb1;
Unsafe.As<byte, Vector256<byte>>(ref d2) = rgb2;
Unsafe.As<byte, Vector256<byte>>(ref d3) = rgb3;
Unsafe.As<byte, Vector256<byte>>(ref d4) = rgb4;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
}
int slice = count * Vector256<byte>.Count;
redChannel = redChannel.Slice(slice);
greenChannel = greenChannel.Slice(slice);
blueChannel = blueChannel.Slice(slice);
destination = destination.Slice(slice);
int finished = count * 4;
redChannel = redChannel.Slice(finished);
greenChannel = greenChannel.Slice(finished);
blueChannel = blueChannel.Slice(finished);
destination = destination.Slice(finished);
}
#endif
private static void PackFromRgbPlanesScalarBatchedReduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgb24> destination)
ref Span<Rgba32> destination)
{
ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel));
ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel));
ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel));
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination);
int count = destination.Length / 4;
destination.Fill(new Rgba32(0, 0, 0, 255));
for (int i = 0; i < count; i++)
{
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3);
ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
@ -199,5 +167,26 @@ namespace SixLabors.ImageSharp
d.B = Unsafe.Add(ref b, i);
}
}
private static void PackFromRgbPlanesRemainder(
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgba32> destination)
{
ref byte r = ref MemoryMarshal.GetReference(redChannel);
ref byte g = ref MemoryMarshal.GetReference(greenChannel);
ref byte b = ref MemoryMarshal.GetReference(blueChannel);
ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < destination.Length; i++)
{
ref Rgba32 d = ref Unsafe.Add(ref rgba, i);
d.R = Unsafe.Add(ref r, i);
d.G = Unsafe.Add(ref g, i);
d.B = Unsafe.Add(ref b, i);
d.A = 255;
}
}
}
}

5
src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs

@ -161,14 +161,15 @@ namespace SixLabors.ImageSharp.PixelFormats
}
/// <summary>
/// Bulk operation that converts 3 seperate RGB channels to <paramref name="destination"/>
/// Bulk operation that packs 3 seperate RGB channels to <paramref name="destination"/>.
/// The destination must have a padding of 3.
/// </summary>
/// <param name="configuration">A <see cref="Configuration"/> to configure internal operations.</param>
/// <param name="redChannel">A <see cref="ReadOnlySpan{T}"/> to the red values.</param>
/// <param name="greenChannel">A <see cref="ReadOnlySpan{T}"/> to the green values.</param>
/// <param name="blueChannel">A <see cref="ReadOnlySpan{T}"/> to the blue values.</param>
/// <param name="destination">A <see cref="Span{T}"/> to the destination pixels.</param>
public virtual void PackFromRgbPlanes(
internal virtual void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,

22
tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs

@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
private float[] rgbaFloat;
[Params(512)]
[Params(1024)]
public int Count { get; set; }
[GlobalSetup]
@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
this.rBuf = new byte[this.Count];
this.gBuf = new byte[this.Count];
this.bBuf = new byte[this.Count];
this.rgbBuf = new Rgb24[this.Count];
this.rgbBuf = new Rgb24[this.Count + 3]; // padded
this.rgbaBuf = new Rgba32[this.Count];
this.rFloat = new float[this.Count];
@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
this.rgbaFloat = new float[this.Count * 4];
}
// [Benchmark(Baseline = true)]
// [Benchmark]
public void Rgb24_Scalar_PerElement_Pinned()
{
fixed (byte* r = &this.rBuf[0])
@ -72,7 +72,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
Span<byte> b = this.rBuf;
Span<Rgb24> rgb = this.rgbBuf;
for (int i = 0; i < rgb.Length; i++)
for (int i = 0; i < r.Length; i++)
{
ref Rgb24 d = ref rgb[i];
d.R = r[i];
@ -81,7 +81,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
}
}
[Benchmark(Baseline = true)]
[Benchmark]
public void Rgb24_Scalar_PerElement_Unsafe()
{
ref byte r = ref this.rBuf[0];
@ -195,7 +195,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
[Benchmark(Baseline = true)]
public void Rgba32_Vector_Float()
{
ref Vector256<float> rBase = ref Unsafe.As<float, Vector256<float>>(ref this.rFloat[0]);
@ -235,6 +235,16 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
}
[Benchmark]
public void Rgba32_Vector_Bytes()
{
ReadOnlySpan<byte> r = this.rBuf;
ReadOnlySpan<byte> g = this.rBuf;
ReadOnlySpan<byte> b = this.rBuf;
Span<Rgb24> rgb = this.rgbBuf;
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb);
}
#endif
#pragma warning disable SA1132

2
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -378,7 +378,7 @@ namespace SixLabors.ImageSharp.Tests.Common
ReadOnlySpan<byte> bb = b.AsSpan();
Span<Rgb24> dd = d.AsSpan();
SimdUtils.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd);
SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd);
for (int i = 0; i < 32; i++)
{

Loading…
Cancel
Save