Browse Source

Unroll loops

js/color-alpha-handling
James Jackson-South 6 years ago
parent
commit
956d1a3c77
  1. 105
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 2
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

105
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -106,34 +106,72 @@ namespace SixLabors.ImageSharp
{
if (Avx.IsSupported)
{
int n = dest.Length / Vector256<float>.Count;
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
int n = dest.Length / Vector256<float>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
ref Vector256<float> vd0 = ref Unsafe.Add(ref destBase, i);
ref Vector256<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
vd0 = Avx.Permute(vs0, control);
Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control);
Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control);
Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control);
}
if (m > 0)
{
Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control);
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control);
}
}
}
else
{
// Sse
int n = dest.Length / Vector128<float>.Count;
ref Vector128<float> sourceBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
int n = dest.Length / Vector128<float>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
Vector128<float> vs = Unsafe.Add(ref sourceBase, i);
Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control);
ref Vector128<float> vd0 = ref Unsafe.Add(ref destBase, i);
ref Vector128<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
vd0 = Sse.Shuffle(vs0, vs0, control);
Vector128<float> vs1 = Unsafe.Add(ref vs0, 1);
Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control);
Vector128<float> vs2 = Unsafe.Add(ref vs0, 2);
Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control);
Vector128<float> vs3 = Unsafe.Add(ref vs0, 3);
Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Vector128<float> vs = Unsafe.Add(ref sourceBase, i);
Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control);
}
}
}
}
@ -146,8 +184,6 @@ namespace SixLabors.ImageSharp
{
if (Avx2.IsSupported)
{
int n = dest.Length / Vector256<byte>.Count;
// I've chosen to do this for convenience while we determine what
// shuffle controls to add to the library.
// We can add static ROS instances if need be in the future.
@ -161,16 +197,32 @@ namespace SixLabors.ImageSharp
ref Vector256<byte> destBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
int n = dest.Length / Vector256<byte>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Avx2.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
}
}
}
else
{
// Ssse3
int n = dest.Length / Vector128<byte>.Count;
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vcm = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
@ -181,10 +233,27 @@ namespace SixLabors.ImageSharp
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
int n = dest.Length / Vector128<byte>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
Vector128<byte> vs = Unsafe.Add(ref sourceBase, i);
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(vs, vcm);
ref Vector128<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector128<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Ssse3.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
}
}
}
}

2
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -229,7 +229,7 @@ namespace SixLabors.ImageSharp
public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2;
[MethodImpl(InliningOptions.ShortMethod)]
public static byte MmShuffle(int p3, int p2, int p1, int p0)
public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0)
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);
[MethodImpl(InliningOptions.ShortMethod)]

Loading…
Cancel
Save