Browse Source

Add optimized fallback for existing shuffles.

js/color-alpha-handling
James Jackson-South 6 years ago
parent
commit
84a1d1a28b
  1. 133
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  2. 2
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

133
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers.Binary;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@ -67,14 +68,53 @@ namespace SixLabors.ImageSharp
}
[MethodImpl(InliningOptions.ColdPath)]
public static void ShuffleRemainder4Channel<T>(
ReadOnlySpan<T> source,
Span<T> dest,
public static void ShuffleRemainder4Channel(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
}
}
[MethodImpl(InliningOptions.ColdPath)]
public static void ShuffleRemainder4Channel(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
where T : struct
{
ref T sBase = ref MemoryMarshal.GetReference(source);
ref T dBase = ref MemoryMarshal.GetReference(dest);
#if NETCOREAPP
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
switch (control)
{
case Shuffle.WXYZ:
WXYZ(source, dest);
return;
case Shuffle.WZYX:
WZYX(source, dest);
return;
case Shuffle.YZWX:
YZWX(source, dest);
return;
case Shuffle.ZYXW:
ZYXW(source, dest);
return;
}
#endif
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 4)
@ -86,6 +126,85 @@ namespace SixLabors.ImageSharp
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void WXYZ(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTL(8, packed) = [Z Y X W]
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void ZYXW(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// tmp1 = [W 0 Y 0]
// tmp2 = [0 Z 0 X]
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
// tmp1 + tmp3 = [W X Y Z]
uint tmp1 = packed & 0xFF00FF00;
uint tmp2 = packed & 0x00FF00FF;
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void WZYX(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// REVERSE(packedArgb) = [X Y Z W]
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void YZWX(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTR(8, packedArgb) = [Y Z W X]
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24);
}
}
[Conditional("DEBUG")]
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
@ -104,7 +223,9 @@ namespace SixLabors.ImageSharp
public static class Shuffle
{
public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3;
public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3;
public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0;
public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1;
public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2;
[MethodImpl(InliningOptions.ShortMethod)]

2
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -13,7 +13,9 @@ namespace SixLabors.ImageSharp.Tests.Common
new TheoryData<byte>
{
SimdUtils.Shuffle.WXYZ,
SimdUtils.Shuffle.WZYX,
SimdUtils.Shuffle.XYZW,
SimdUtils.Shuffle.YZWX,
SimdUtils.Shuffle.ZYXW,
SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0),
SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1),

Loading…
Cancel
Save