Browse Source

Shuffle3 + Tests

js/color-alpha-handling
James Jackson-South 6 years ago
parent
commit
b010a15012
  1. 5
      src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
  2. 5
      src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
  3. 90
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
  4. 12
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
  5. 102
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  6. 48
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  7. 76
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

5
src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

@ -40,6 +40,11 @@ namespace SixLabors.ImageSharp
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
{
Guard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
Guard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
Guard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
Guard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;

5
src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs

@ -21,6 +21,11 @@ namespace SixLabors.ImageSharp
public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
{
Guard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
Guard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
Guard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
Guard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;

90
src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs

@ -0,0 +1,90 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle3 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle3 : IShuffle3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle3(byte p2, byte p1, byte p0)
{
Guard.MustBeBetweenOrEqualTo<byte>(p2, 0, 2, nameof(p2));
Guard.MustBeBetweenOrEqualTo<byte>(p1, 0, 2, nameof(p1));
Guard.MustBeBetweenOrEqualTo<byte>(p0, 0, 2, nameof(p0));
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0; i < source.Length; i += 3)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
}
}
}
internal readonly struct ZYXShuffle3 : IShuffle3
{
private static readonly byte ZYX = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
public byte Control => ZYX;
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref Byte3 sBase = ref Unsafe.As<byte, Byte3>(ref MemoryMarshal.GetReference(source));
ref Byte3 dBase = ref Unsafe.As<byte, Byte3>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 3;
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.As<Byte3, uint>(ref Unsafe.Add(ref sBase, i));
// packed = [W Z Y X]
// tmp1 = [W 0 Y 0]
// tmp2 = [0 Z 0 X]
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
// tmp1 + tmp3 = [W X Y Z]
uint tmp1 = packed & 0xFF00FF00;
uint tmp2 = packed & 0x00FF00FF;
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
packed = tmp1 + tmp3;
Unsafe.Add(ref dBase, i) = Unsafe.As<uint, Byte3>(ref packed);
}
}
}
[StructLayout(LayoutKind.Explicit, Size = 3)]
internal readonly struct Byte3
{
}
}

12
src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs

@ -20,6 +20,11 @@ namespace SixLabors.ImageSharp
public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0)
{
Guard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
Guard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
Guard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
Guard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
@ -62,13 +67,8 @@ namespace SixLabors.ImageSharp
int n = source.Length / 4;
for (int i = 0, j = 0; i < n; i++, j += 3)
{
Unsafe.As<byte, Xyz24>(ref Unsafe.Add(ref dBase, j)) = Unsafe.As<uint, Xyz24>(ref Unsafe.Add(ref sBase, i));
Unsafe.As<byte, Byte3>(ref Unsafe.Add(ref dBase, j)) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, i));
}
}
}
[StructLayout(LayoutKind.Explicit, Size = 3)]
internal readonly struct Xyz24
{
}
}

102
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -86,6 +86,38 @@ namespace SixLabors.ImageSharp
}
}
/// <summary>
/// Shuffles 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
int remainder = source.Length % (Vector128<byte>.Count * 3);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
Shuffle3(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
@ -94,7 +126,7 @@ namespace SixLabors.ImageSharp
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static unsafe void Pad3Shuffle4Reduce(
public static void Pad3Shuffle4Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
@ -127,7 +159,7 @@ namespace SixLabors.ImageSharp
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static unsafe void Shuffle4Slice3Reduce(
public static void Shuffle4Slice3Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
@ -313,7 +345,69 @@ namespace SixLabors.ImageSharp
}
[MethodImpl(InliningOptions.ShortMethod)]
private static unsafe void Pad3Shuffle4(
private static void Shuffle3(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
Vector128<byte> vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte();
Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
Vector128<byte> vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte();
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte();
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0; i < n; i += 3)
{
ref Vector128<byte> v0 = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v1 = Unsafe.Add(ref v0, 1);
Vector128<byte> v2 = Unsafe.Add(ref v0, 2);
Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v2 = Ssse3.AlignRight(v2, v1, 8);
v1 = Ssse3.AlignRight(v1, v0, 12);
v0 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle);
v1 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle);
v2 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle);
v3 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle);
v0 = Ssse3.Shuffle(v0, vmaske);
v1 = Ssse3.Shuffle(v1, vmasko);
v2 = Ssse3.Shuffle(v2, vmaske);
v3 = Ssse3.Shuffle(v3, vmasko);
v0 = Ssse3.AlignRight(v1, v0, 4);
v3 = Ssse3.AlignRight(v3, v2, 12);
v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v1 = Ssse3.AlignRight(v2, v1, 8);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, i);
vd = v0;
Unsafe.Add(ref vd, 1) = v1;
Unsafe.Add(ref vd, 2) = v3;
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Pad3Shuffle4(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
@ -356,7 +450,7 @@ namespace SixLabors.ImageSharp
}
[MethodImpl(InliningOptions.ShortMethod)]
private static unsafe void Shuffle4Slice3(
private static void Shuffle4Slice3(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)

48
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -23,7 +23,7 @@ namespace SixLabors.ImageSharp
Span<float> dest,
byte control)
{
VerifyShuffleSpanInput(source, dest);
VerifyShuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control);
@ -50,7 +50,7 @@ namespace SixLabors.ImageSharp
TShuffle shuffle)
where TShuffle : struct, IComponentShuffle
{
VerifyShuffleSpanInput(source, dest);
VerifyShuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control);
@ -63,6 +63,33 @@ namespace SixLabors.ImageSharp
}
}
/// <summary>
/// Shuffle 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IShuffle3
{
VerifyShuffle3SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
/// <summary>
/// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
@ -136,7 +163,7 @@ namespace SixLabors.ImageSharp
}
[Conditional("DEBUG")]
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
private static void VerifyShuffle4SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
{
DebugGuard.IsTrue(
@ -150,6 +177,21 @@ namespace SixLabors.ImageSharp
"Input spans must be divisable by 4!");
}
[Conditional("DEBUG")]
private static void VerifyShuffle3SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
{
DebugGuard.IsTrue(
source.Length == dest.Length,
nameof(source),
"Input spans must be of same length!");
DebugGuard.IsTrue(
source.Length % 3 == 0,
nameof(source),
"Input spans must be divisable by 3!");
}
[Conditional("DEBUG")]
private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan<byte> source, Span<byte> dest)
{

76
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -92,6 +92,48 @@ namespace SixLabors.ImageSharp.Tests.Common
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy3))]
public void BulkShuffleByte3Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IShuffle3 to the generic utils method.
ZYXShuffle3 zyx = default;
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx),
zyx.Control);
var xyz = new DefaultShuffle3(2, 1, 0);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz),
xyz.Control);
var yyy = new DefaultShuffle3(1, 1, 1);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy),
yyy.Control);
var zzz = new DefaultShuffle3(2, 2, 2);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz),
zzz.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy3))]
public void BulkPad3Shuffle4Channel(int count)
@ -102,7 +144,7 @@ namespace SixLabors.ImageSharp.Tests.Common
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
// IPad3Shuffle4 to the generic utils method.
XYZWPad3Shuffle4 xyzw = default;
TestPad3Shuffle4Channel(
size,
@ -144,7 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Common
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
// IShuffle4Slice3 to the generic utils method.
XYZWShuffle4Slice3 xyzw = default;
TestShuffle4Slice3Channel(
size,
@ -237,6 +279,36 @@ namespace SixLabors.ImageSharp.Tests.Common
Assert.Equal(expected, result);
}
private static void TestShuffleByte3Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count];
byte[] expected = new byte[count];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int _,
out int p2,
out int p1,
out int p0);
for (int i = 0; i < expected.Length; i += 3)
{
expected[i] = source[p0 + i];
expected[i + 1] = source[p1 + i];
expected[i + 2] = source[p2 + i];
}
convert(source, result);
Assert.Equal(expected, result);
}
private static void TestPad3Shuffle4Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,

Loading…
Cancel
Save