Browse Source

Faster Shuffle4Slice3

pull/1409/head
James Jackson-South 5 years ago
parent
commit
f462bfe7f0
  1. 88
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 4
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

88
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -104,7 +104,7 @@ namespace SixLabors.ImageSharp
int remainder = source.Length % (Vector128<byte>.Count * 3);
int sourceCount = source.Length - remainder;
int destCount = (int)(sourceCount * (4 / 3D));
int destCount = sourceCount * 4 / 3;
if (sourceCount > 0)
{
@ -134,20 +134,20 @@ namespace SixLabors.ImageSharp
{
if (Ssse3.IsSupported)
{
int remainder = ImageMaths.ModuloP2(dest.Length, Vector128<byte>.Count);
int remainder = source.Length % (Vector128<byte>.Count * 4);
int adjustedCount = dest.Length - remainder;
int destSlice = (int)(adjustedCount * (3 / 4F));
int sourceCount = source.Length - remainder;
int destCount = sourceCount * 3 / 4;
if (adjustedCount > 0)
if (sourceCount > 0)
{
Shuffle4Slice3(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
source.Slice(0, sourceCount),
dest.Slice(0, destCount),
control);
source = source.Slice(adjustedCount);
dest = dest.Slice(destSlice);
source = source.Slice(sourceCount);
dest = dest.Slice(destCount);
}
}
}
@ -243,7 +243,7 @@ namespace SixLabors.ImageSharp
// We can add static ROS instances if need be in the future.
Span<byte> bytes = stackalloc byte[Vector256<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector256<byte> vcm = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
Vector256<byte> vshuffle = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector256<byte> sourceBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(source));
@ -260,17 +260,17 @@ namespace SixLabors.ImageSharp
ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Avx2.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
vd0 = Avx2.Shuffle(vs0, vshuffle);
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
}
}
}
@ -279,7 +279,7 @@ namespace SixLabors.ImageSharp
// Ssse3
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vcm = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
@ -296,17 +296,17 @@ namespace SixLabors.ImageSharp
ref Vector128<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector128<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Ssse3.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
vd0 = Ssse3.Shuffle(vs0, vshuffle);
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
}
}
}
@ -363,26 +363,46 @@ namespace SixLabors.ImageSharp
{
if (Ssse3.IsSupported)
{
Vector128<byte> sliceMask = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, -1, -1, -1, -1).AsByte();
Vector128<byte> vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte();
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte();
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vcm = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
fixed (byte* sBase = source)
fixed (byte* dBase = dest)
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0, j = 0; i < n; i += 4, j += 3)
{
byte* s = sBase;
byte* d = dBase;
ref Vector128<byte> v0 = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v1 = Unsafe.Add(ref v0, 1);
Vector128<byte> v2 = Unsafe.Add(ref v0, 2);
Vector128<byte> v3 = Unsafe.Add(ref v0, 3);
for (int i = 0; i < source.Length; i += 16)
{
Vector128<byte> vs0 = Ssse3.Shuffle(Sse2.LoadVector128(s), vcm);
Sse2.Store(d, Ssse3.Shuffle(vs0, sliceMask));
v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske);
v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko);
v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske);
v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko);
s += 16;
d += 12;
}
v0 = Ssse3.AlignRight(v1, v0, 4);
v3 = Ssse3.AlignRight(v3, v2, 12);
v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v1 = Ssse3.AlignRight(v2, v1, 8);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, j);
vd = v0;
Unsafe.Add(ref vd, 1) = v1;
Unsafe.Add(ref vd, 2) = v3;
}
}
}

4
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp
"Output span must be divisable by 4!");
DebugGuard.IsTrue(
source.Length == (int)(dest.Length * 3 / 4D),
source.Length == dest.Length * 3 / 4,
nameof(source),
"Input span must be 3/4 the length of the output span!");
}
@ -202,7 +202,7 @@ namespace SixLabors.ImageSharp
"Output span must be divisable by 3!");
DebugGuard.IsTrue(
source.Length == (int)(dest.Length * 4 / 3F),
source.Length == dest.Length * 4 / 3,
nameof(source),
"Output span must be 3/4 the length of the input span!");
}

Loading…
Cancel
Save