Browse Source

Unroll loops

js/color-alpha-handling
James Jackson-South 6 years ago
parent
commit
956d1a3c77
  1. 105
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 2
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

105
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -106,34 +106,72 @@ namespace SixLabors.ImageSharp
{ {
if (Avx.IsSupported) if (Avx.IsSupported)
{ {
int n = dest.Length / Vector256<float>.Count;
ref Vector256<float> sourceBase = ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source)); ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> destBase = ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest)); ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++) int n = dest.Length / Vector256<float>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
ref Vector256<float> vd0 = ref Unsafe.Add(ref destBase, i);
ref Vector256<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
vd0 = Avx.Permute(vs0, control);
Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control);
Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control);
Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control);
}
if (m > 0)
{ {
Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control);
}
} }
} }
else else
{ {
// Sse // Sse
int n = dest.Length / Vector128<float>.Count;
ref Vector128<float> sourceBase = ref Vector128<float> sourceBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source)); ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<float> destBase = ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest)); ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++) int n = dest.Length / Vector128<float>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{ {
Vector128<float> vs = Unsafe.Add(ref sourceBase, i); ref Vector128<float> vd0 = ref Unsafe.Add(ref destBase, i);
Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); ref Vector128<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
vd0 = Sse.Shuffle(vs0, vs0, control);
Vector128<float> vs1 = Unsafe.Add(ref vs0, 1);
Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control);
Vector128<float> vs2 = Unsafe.Add(ref vs0, 2);
Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control);
Vector128<float> vs3 = Unsafe.Add(ref vs0, 3);
Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Vector128<float> vs = Unsafe.Add(ref sourceBase, i);
Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control);
}
} }
} }
} }
@ -146,8 +184,6 @@ namespace SixLabors.ImageSharp
{ {
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
int n = dest.Length / Vector256<byte>.Count;
// I've chosen to do this for convenience while we determine what // I've chosen to do this for convenience while we determine what
// shuffle controls to add to the library. // shuffle controls to add to the library.
// We can add static ROS instances if need be in the future. // We can add static ROS instances if need be in the future.
@ -161,16 +197,32 @@ namespace SixLabors.ImageSharp
ref Vector256<byte> destBase = ref Vector256<byte> destBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest)); ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++) int n = dest.Length / Vector256<byte>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{ {
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Avx2.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
}
} }
} }
else else
{ {
// Ssse3 // Ssse3
int n = dest.Length / Vector128<byte>.Count;
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count]; Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control); Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vcm = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes)); Vector128<byte> vcm = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
@ -181,10 +233,27 @@ namespace SixLabors.ImageSharp
ref Vector128<byte> destBase = ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest)); ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++) int n = dest.Length / Vector128<byte>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{ {
Vector128<byte> vs = Unsafe.Add(ref sourceBase, i); ref Vector128<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(vs, vcm); ref Vector128<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Ssse3.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
}
} }
} }
} }

2
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -229,7 +229,7 @@ namespace SixLabors.ImageSharp
public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2;
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static byte MmShuffle(int p3, int p2, int p1, int p0) public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0)
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]

Loading…
Cancel
Save