Browse Source

Update based on feedback

js/color-alpha-handling
James Jackson-South 5 years ago
parent
commit
aa20c09c48
  1. 165
      src/ImageSharp/Common/Helpers/IComponentShuffle.cs
  2. 142
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  3. 13
      src/ImageSharp/PixelFormats/Utils/PixelConverter.cs
  4. 2
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
  5. 2
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
  6. 11
      tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs
  7. 93
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

165
src/ImageSharp/Common/Helpers/IComponentShuffle.cs

@ -0,0 +1,165 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <summary>
/// Defines the contract for methods that allow the shuffling of pixel components.
/// Used for shuffling on platforms that do not support Hardware Intrinsics.
/// </summary>
internal interface IComponentShuffle
{
/// <summary>
/// Gets the shuffle control.
/// </summary>
byte Control { get; }
/// <summary>
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
}
internal readonly struct DefaultShuffle4 : IComponentShuffle
{
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0))
{
}
public DefaultShuffle4(byte control) => this.Control = control;
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
SimdUtils.Shuffle.InverseMmShuffle(
this.Control,
out int p3,
out int p2,
out int p1,
out int p0);
for (int i = 0; i < source.Length; i += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
}
}
}
internal readonly struct WXYZShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTL(8, packed) = [Z Y X W]
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
}
}
}
internal readonly struct WZYXShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// REVERSE(packedArgb) = [X Y Z W]
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
}
}
}
internal readonly struct YZWXShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTR(8, packedArgb) = [Y Z W X]
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24);
}
}
}
internal readonly struct ZYXWShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// tmp1 = [W 0 Y 0]
// tmp2 = [0 Z 0 X]
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
// tmp1 + tmp3 = [W X Y Z]
uint tmp1 = packed & 0xFF00FF00;
uint tmp2 = packed & 0x00FF00FF;
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
}
}
}
}

142
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -2,7 +2,6 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers.Binary;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@ -40,34 +39,32 @@ namespace SixLabors.ImageSharp
}
/// <summary>
/// Shuffle 8-bit integers in a within 128-bit lanes in <paramref name="source"/>
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Channel(
public static void Shuffle4Channel<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
TShuffle shuffle)
where TShuffle : struct, IComponentShuffle
{
VerifyShuffleSpanInput(source, dest);
// TODO: There doesn't seem to be any APIs for
// System.Numerics that allow shuffling.
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control);
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
ShuffleRemainder4Channel(source, dest, control);
shuffle.RunFallbackShuffle(source, dest);
}
}
[MethodImpl(InliningOptions.ColdPath)]
public static void ShuffleRemainder4Channel(
ReadOnlySpan<float> source,
Span<float> dest,
@ -86,125 +83,6 @@ namespace SixLabors.ImageSharp
}
}
[MethodImpl(InliningOptions.ColdPath)]
public static void ShuffleRemainder4Channel(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
#if NETCOREAPP
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
switch (control)
{
case Shuffle.WXYZ:
WXYZ(source, dest);
return;
case Shuffle.WZYX:
WZYX(source, dest);
return;
case Shuffle.YZWX:
YZWX(source, dest);
return;
case Shuffle.ZYXW:
ZYXW(source, dest);
return;
}
#endif
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void WXYZ(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTL(8, packed) = [Z Y X W]
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void ZYXW(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// tmp1 = [W 0 Y 0]
// tmp2 = [0 Z 0 X]
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
// tmp1 + tmp3 = [W X Y Z]
uint tmp1 = packed & 0xFF00FF00;
uint tmp2 = packed & 0x00FF00FF;
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void WZYX(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// REVERSE(packedArgb) = [X Y Z W]
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void YZWX(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
for (int i = 0; i < s.Length; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTR(8, packedArgb) = [Y Z W X]
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24);
}
}
[Conditional("DEBUG")]
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
@ -222,12 +100,6 @@ namespace SixLabors.ImageSharp
public static class Shuffle
{
public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3;
public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3;
public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0;
public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1;
public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2;
[MethodImpl(InliningOptions.ShortMethod)]
public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0)
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);

13
src/ImageSharp/PixelFormats/Utils/PixelConverter.cs

@ -2,7 +2,6 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.PixelFormats.Utils
@ -28,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WXYZ);
=> SimdUtils.Shuffle4Channel<WXYZShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
@ -37,7 +36,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW);
=> SimdUtils.Shuffle4Channel<ZYXWShuffle4>(source, dest, default);
}
public static class FromArgb32
@ -49,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.YZWX);
=> SimdUtils.Shuffle4Channel<YZWXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
@ -58,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX);
=> SimdUtils.Shuffle4Channel<WZYXShuffle4>(source, dest, default);
}
public static class FromBgra32
@ -70,7 +69,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX);
=> SimdUtils.Shuffle4Channel<WZYXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
@ -79,7 +78,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW);
=> SimdUtils.Shuffle4Channel<ZYXWShuffle4>(source, dest, default);
}
}
}

2
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs

@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ);
SimdUtils.Shuffle4Channel<WXYZShuffle4>(this.source, this.destination, default);
}
}

2
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs

@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ);
SimdUtils.Shuffle4Channel(this.source, this.destination, default(WXYZShuffle4).Control);
}
}

11
tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs

@ -58,6 +58,12 @@ namespace SixLabors.ImageSharp.Benchmarks
{
public HwIntrinsics_SSE_AVX()
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithEnvironmentVariables(
new EnvironmentVariable(EnableHWIntrinsic, Off),
new EnvironmentVariable(FeatureSIMD, Off))
.WithId("No HwIntrinsics"));
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
@ -72,11 +78,6 @@ namespace SixLabors.ImageSharp.Benchmarks
.WithId("SSE"));
}
#endif
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithEnvironmentVariables(
new EnvironmentVariable(EnableHWIntrinsic, Off),
new EnvironmentVariable(FeatureSIMD, Off))
.WithId("No HwIntrinsics"));
}
}
}

93
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -9,66 +9,91 @@ namespace SixLabors.ImageSharp.Tests.Common
{
public partial class SimdUtilsTests
{
public static readonly TheoryData<byte> ShuffleControls =
new TheoryData<byte>
{
SimdUtils.Shuffle.WXYZ,
SimdUtils.Shuffle.WZYX,
SimdUtils.Shuffle.XYZW,
SimdUtils.Shuffle.YZWX,
SimdUtils.Shuffle.ZYXW,
SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0),
SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1),
SimdUtils.Shuffle.MmShuffle(3, 3, 3, 3)
};
[Theory]
[MemberData(nameof(ShuffleControls))]
public void BulkShuffleFloat4Channel(byte control)
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void BulkShuffleFloat4Channel(int count)
{
static void RunTest(string serialized)
{
byte ctrl = FeatureTestRunner.Deserialize<byte>(serialized);
foreach (var item in ArraySizesDivisibleBy4)
{
foreach (var count in item)
{
TestShuffleFloat4Channel(
(int)count,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl),
ctrl);
}
}
// No need to test multiple shuffle controls as the
// pipeline is always the same.
int size = FeatureTestRunner.Deserialize<int>(serialized);
byte control = default(WZYXShuffle4).Control;
TestShuffleFloat4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control),
control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
control,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ShuffleControls))]
public void BulkShuffleByte4Channel(byte control)
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void BulkShuffleByte4Channel(int count)
{
static void RunTest(string serialized)
{
byte ctrl = FeatureTestRunner.Deserialize<byte>(serialized);
int size = FeatureTestRunner.Deserialize<int>(serialized);
foreach (var item in ArraySizesDivisibleBy4)
{
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
foreach (var count in item)
{
WXYZShuffle4 wxyz = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz),
wxyz.Control);
WZYXShuffle4 wzyx = default;
TestShuffleByte4Channel(
(int)count,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl),
ctrl);
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx),
wzyx.Control);
YZWXShuffle4 yzwx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx),
yzwx.Control);
ZYXWShuffle4 zyxw = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw),
zyxw.Control);
var xwyz = new DefaultShuffle4(2, 1, 3, 0);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4(1, 1, 1, 1);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4(3, 3, 3, 3);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww),
wwww.Control);
}
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
control,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}

Loading…
Cancel
Save