|
|
|
@ -2,6 +2,7 @@ |
|
|
|
// Licensed under the Apache License, Version 2.0.
|
|
|
|
|
|
|
|
using System; |
|
|
|
using System.Buffers.Binary; |
|
|
|
using System.Diagnostics; |
|
|
|
using System.Runtime.CompilerServices; |
|
|
|
using System.Runtime.InteropServices; |
|
|
|
@ -67,14 +68,53 @@ namespace SixLabors.ImageSharp |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ColdPath)] |
|
|
|
public static void ShuffleRemainder4Channel<T>( |
|
|
|
ReadOnlySpan<T> source, |
|
|
|
Span<T> dest, |
|
|
|
public static void ShuffleRemainder4Channel( |
|
|
|
ReadOnlySpan<float> source, |
|
|
|
Span<float> dest, |
|
|
|
byte control) |
|
|
|
{ |
|
|
|
ref float sBase = ref MemoryMarshal.GetReference(source); |
|
|
|
ref float dBase = ref MemoryMarshal.GetReference(dest); |
|
|
|
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); |
|
|
|
|
|
|
|
for (int i = 0; i < source.Length; i += 4) |
|
|
|
{ |
|
|
|
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); |
|
|
|
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); |
|
|
|
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); |
|
|
|
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ColdPath)] |
|
|
|
public static void ShuffleRemainder4Channel( |
|
|
|
ReadOnlySpan<byte> source, |
|
|
|
Span<byte> dest, |
|
|
|
byte control) |
|
|
|
where T : struct |
|
|
|
{ |
|
|
|
ref T sBase = ref MemoryMarshal.GetReference(source); |
|
|
|
ref T dBase = ref MemoryMarshal.GetReference(dest); |
|
|
|
#if NETCOREAPP
|
|
|
|
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
|
|
|
|
// and ROTR (Rotate Right) emitting efficient CPU instructions:
|
|
|
|
// https://github.com/dotnet/coreclr/pull/1830
|
|
|
|
switch (control) |
|
|
|
{ |
|
|
|
case Shuffle.WXYZ: |
|
|
|
WXYZ(source, dest); |
|
|
|
return; |
|
|
|
case Shuffle.WZYX: |
|
|
|
WZYX(source, dest); |
|
|
|
return; |
|
|
|
case Shuffle.YZWX: |
|
|
|
YZWX(source, dest); |
|
|
|
return; |
|
|
|
case Shuffle.ZYXW: |
|
|
|
ZYXW(source, dest); |
|
|
|
return; |
|
|
|
} |
|
|
|
#endif
|
|
|
|
|
|
|
|
ref byte sBase = ref MemoryMarshal.GetReference(source); |
|
|
|
ref byte dBase = ref MemoryMarshal.GetReference(dest); |
|
|
|
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); |
|
|
|
|
|
|
|
for (int i = 0; i < source.Length; i += 4) |
|
|
|
@ -86,6 +126,85 @@ namespace SixLabors.ImageSharp |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
private static void WXYZ(ReadOnlySpan<byte> source, Span<byte> dest) |
|
|
|
{ |
|
|
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|
|
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|
|
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|
|
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|
|
|
|
|
|
|
for (int i = 0; i < s.Length; i++) |
|
|
|
{ |
|
|
|
uint packed = Unsafe.Add(ref sBase, i); |
|
|
|
|
|
|
|
// packed = [W Z Y X]
|
|
|
|
// ROTL(8, packed) = [Z Y X W]
|
|
|
|
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
private static void ZYXW(ReadOnlySpan<byte> source, Span<byte> dest) |
|
|
|
{ |
|
|
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|
|
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|
|
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|
|
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|
|
|
|
|
|
|
for (int i = 0; i < s.Length; i++) |
|
|
|
{ |
|
|
|
uint packed = Unsafe.Add(ref sBase, i); |
|
|
|
|
|
|
|
// packed = [W Z Y X]
|
|
|
|
// tmp1 = [W 0 Y 0]
|
|
|
|
// tmp2 = [0 Z 0 X]
|
|
|
|
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
|
|
|
|
// tmp1 + tmp3 = [W X Y Z]
|
|
|
|
uint tmp1 = packed & 0xFF00FF00; |
|
|
|
uint tmp2 = packed & 0x00FF00FF; |
|
|
|
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); |
|
|
|
|
|
|
|
Unsafe.Add(ref dBase, i) = tmp1 + tmp3; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
private static void WZYX(ReadOnlySpan<byte> source, Span<byte> dest) |
|
|
|
{ |
|
|
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|
|
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|
|
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|
|
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|
|
|
|
|
|
|
for (int i = 0; i < s.Length; i++) |
|
|
|
{ |
|
|
|
uint packed = Unsafe.Add(ref sBase, i); |
|
|
|
|
|
|
|
// packed = [W Z Y X]
|
|
|
|
// REVERSE(packedArgb) = [X Y Z W]
|
|
|
|
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
private static void YZWX(ReadOnlySpan<byte> source, Span<byte> dest) |
|
|
|
{ |
|
|
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|
|
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|
|
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|
|
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|
|
|
|
|
|
|
for (int i = 0; i < s.Length; i++) |
|
|
|
{ |
|
|
|
uint packed = Unsafe.Add(ref sBase, i); |
|
|
|
|
|
|
|
// packed = [W Z Y X]
|
|
|
|
// ROTR(8, packedArgb) = [Y Z W X]
|
|
|
|
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
[Conditional("DEBUG")] |
|
|
|
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest) |
|
|
|
where T : struct |
|
|
|
@ -104,7 +223,9 @@ namespace SixLabors.ImageSharp |
|
|
|
public static class Shuffle |
|
|
|
{ |
|
|
|
public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; |
|
|
|
public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3; |
|
|
|
public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; |
|
|
|
public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1; |
|
|
|
public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
|