Browse Source

Add 4 channel float shuffling.

js/color-alpha-handling
James Jackson-South 6 years ago
parent
commit
893247bd88
  1. 80
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 131
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  3. 68
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
  4. 1
      tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj
  5. 75
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs
  6. 14
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  7. 2
      tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs
  8. 21
      tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs
  9. 4
      tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs

80
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -18,6 +18,86 @@ namespace SixLabors.ImageSharp
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of floats</param>
/// <param name="dest">The destination span of float</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4ChannelReduce(
ref ReadOnlySpan<float> source,
ref Span<float> dest,
byte control)
{
if (Avx.IsSupported || Sse.IsSupported)
{
int remainder;
if (Avx.IsSupported)
{
remainder = ImageMaths.ModuloP2(source.Length, Vector256<float>.Count);
}
else
{
remainder = ImageMaths.ModuloP2(source.Length, Vector128<float>.Count);
}
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
Shuffle4Channel(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4Channel(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
if (Avx.IsSupported)
{
int n = dest.Length / Vector256<float>.Count;
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control);
}
}
else
{
// Sse
int n = dest.Length / Vector128<float>.Count;
ref Vector128<float> sourceBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
{
Vector128<float> vs = Unsafe.Add(ref sourceBase, i);
Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control);
}
}
}
/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
/// </summary>

131
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -0,0 +1,131 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of floats</param>
/// <param name="dest">The destination span of float</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Channel(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
VerifyShuffleSpanInput(source, dest);
// TODO: There doesn't seem to be any APIs for
// System.Numerics that allow shuffling.
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
ShuffleRemainder4Channel(source, dest, control);
}
}
[MethodImpl(InliningOptions.ColdPath)]
public static void ShuffleRemainder4Channel(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
}
}
[Conditional("DEBUG")]
private static void VerifyShuffleSpanInput(ReadOnlySpan<float> source, Span<float> dest)
{
DebugGuard.IsTrue(
source.Length == dest.Length,
nameof(source),
"Input spans must be of same length!");
DebugGuard.IsTrue(
source.Length % 4 == 0,
nameof(source),
"Input spans must be divisiable by 4!");
}
public static class Shuffle
{
public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3;
public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0;
public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2;
public static ReadOnlySpan<byte> WXYZ_128 => MmShuffleByte128(2, 1, 0, 3);
public static ReadOnlySpan<byte> XYZW_128 => MmShuffleByte128(3, 2, 1, 0);
public static ReadOnlySpan<byte> ZYXW_128 => MmShuffleByte128(3, 0, 1, 2);
public static ReadOnlySpan<byte> WXYZ_256 => MmShuffleByte256(2, 1, 0, 3);
public static ReadOnlySpan<byte> XYZW_256 => MmShuffleByte256(3, 2, 1, 0);
public static ReadOnlySpan<byte> ZYXW_256 => MmShuffleByte256(3, 0, 1, 2);
private static byte[] MmShuffleByte128(int p3, int p2, int p1, int p0)
{
byte[] result = new byte[16];
for (int i = 0; i < result.Length; i += 4)
{
result[i] = (byte)(p0 + i);
result[i + 1] = (byte)(p1 + i);
result[i + 2] = (byte)(p2 + i);
result[i + 3] = (byte)(p3 + i);
}
return result;
}
private static byte[] MmShuffleByte256(int p3, int p2, int p1, int p0)
{
byte[] result = new byte[32];
for (int i = 0; i < result.Length; i += 4)
{
result[i] = (byte)(p0 + i);
result[i + 1] = (byte)(p1 + i);
result[i + 2] = (byte)(p2 + i);
result[i + 3] = (byte)(p3 + i);
}
return result;
}
public static void InverseMmShuffle(byte control, out int p3, out int p2, out int p1, out int p0)
{
p3 = control >> 6 & 0x3;
p2 = control >> 4 & 0x3;
p1 = control >> 2 & 0x3;
p0 = control >> 0 & 0x3;
}
}
}
}

68
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs

@ -0,0 +1,68 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Tests;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class ShuffleFloat4Channel
{
private float[] source;
private float[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256);
this.destination = new float[this.Count];
}
[Params(128, 256, 512, 1024, 2048)]
public int Count { get; set; }
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ);
}
}
// 2020-10-26
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK = 5.0.100-rc.2.20479.15
//
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |---------------- |-------------------------------------------------- |------ |------------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Channel | AVX | Empty | 128 | 14.49 ns | 0.244 ns | 0.217 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 87.74 ns | 0.524 ns | 0.490 ns | 6.06 | 0.09 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 23.65 ns | 0.101 ns | 0.094 ns | 1.63 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 256 | 25.87 ns | 0.492 ns | 0.673 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 159.52 ns | 0.901 ns | 0.843 ns | 6.12 | 0.12 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 45.47 ns | 0.404 ns | 0.378 ns | 1.75 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 512 | 49.51 ns | 0.088 ns | 0.083 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 297.96 ns | 0.926 ns | 0.821 ns | 6.02 | 0.02 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 90.77 ns | 0.191 ns | 0.169 ns | 1.83 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 1024 | 113.09 ns | 1.913 ns | 3.090 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 604.58 ns | 1.464 ns | 1.298 ns | 5.29 | 0.18 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 179.44 ns | 0.208 ns | 0.184 ns | 1.57 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 2048 | 217.95 ns | 1.314 ns | 1.165 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 1,152.04 ns | 3.941 ns | 3.494 ns | 5.29 | 0.03 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 349.52 ns | 0.587 ns | 0.520 ns | 1.60 | 0.01 | - | - | - | - |
}

1
tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj

@ -17,6 +17,7 @@
<ItemGroup>
<Compile Include="..\ImageSharp.Tests\TestImages.cs" Link="Tests\TestImages.cs" />
<Compile Include="..\ImageSharp.Tests\TestUtilities\TestEnvironment.cs" Link="Tests\TestEnvironment.cs" />
<Compile Include="..\ImageSharp.Tests\TestUtilities\TestDataGenerator.cs" Link="Tests\TestDataGenerator.cs" />
</ItemGroup>
<ItemGroup>

75
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -0,0 +1,75 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Common
{
public partial class SimdUtilsTests
{
public static readonly TheoryData<byte> ShuffleControls =
new TheoryData<byte>
{
SimdUtils.Shuffle.WXYZ,
SimdUtils.Shuffle.XYZW,
SimdUtils.Shuffle.ZYXW
};
[Theory]
[MemberData(nameof(ShuffleControls))]
public void BulkShuffleFloat4Channel(byte control)
{
static void RunTest(string serialized)
{
byte ctrl = FeatureTestRunner.Deserialize<byte>(serialized);
foreach (var item in ArraySizesDivisibleBy4)
{
foreach (var count in item)
{
TestShuffle(
(int)count,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl),
ctrl);
}
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
control,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
private static void TestShuffle(
int count,
Action<Memory<float>, Memory<float>> convert,
byte control)
{
float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256);
var result = new float[count];
float[] expected = new float[count];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int p3,
out int p2,
out int p1,
out int p0);
for (int i = 0; i < expected.Length; i += 4)
{
expected[i] = source[p0 + i];
expected[i + 1] = source[p1 + i];
expected[i + 2] = source[p2 + i];
expected[i + 3] = source[p3 + i];
}
convert(source, result);
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F));
}
}
}

14
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -13,7 +13,7 @@ using Xunit.Abstractions;
namespace SixLabors.ImageSharp.Tests.Common
{
public class SimdUtilsTests
public partial class SimdUtilsTests
{
private ITestOutputHelper Output { get; }
@ -212,14 +212,14 @@ namespace SixLabors.ImageSharp.Tests.Common
static void RunTest(string serialized)
{
TestImpl_BulkConvertByteToNormalizedFloat(
FeatureTestRunner.Deserialize(serialized),
FeatureTestRunner.Deserialize<int>(serialized),
(s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span));
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41,
count);
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41);
}
#endif
@ -305,14 +305,14 @@ namespace SixLabors.ImageSharp.Tests.Common
static void RunTest(string serialized)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
FeatureTestRunner.Deserialize(serialized),
FeatureTestRunner.Deserialize<int>(serialized),
(s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2,
count);
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
#endif

2
tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs

@ -535,7 +535,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
static void RunTest(string serialized)
{
TestImageProvider<Rgba32> provider =
FeatureTestRunner.Deserialize<TestImageProvider<Rgba32>>(serialized);
FeatureTestRunner.DeserializeForXunit<TestImageProvider<Rgba32>>(serialized);
foreach (PngInterlaceMode interlaceMode in InterlaceMode)
{

21
tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs

@ -29,17 +29,19 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities
/// <typeparam name="T">The type to deserialize to.</typeparam>
/// <param name="value">The string value to deserialize.</param>
/// <returns>The <see cref="T"/> value.</returns>
public static T Deserialize<T>(string value)
public static T DeserializeForXunit<T>(string value)
where T : IXunitSerializable
=> BasicSerializer.Deserialize<T>(value);
/// <summary>
/// Allows the deserialization of integers passed to the feature test.
/// Allows the deserialization of types implementing <see cref="IConvertible"/>
/// passed to the feature test.
/// </summary>
/// <param name="value">The string value to deserialize.</param>
/// <returns>The <see cref="int"/> value.</returns>
public static int Deserialize(string value)
=> Convert.ToInt32(value);
/// <returns>The <typeparamref name="T"/> value.</returns>
public static T Deserialize<T>(string value)
where T : IConvertible
=> (T)Convert.ChangeType(value, typeof(T));
/// <summary>
/// Runs the given test <paramref name="action"/> within an environment
@ -214,12 +216,13 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
public static void RunWithHwIntrinsicsFeature(
/// <param name="intrinsics">The intrinsics features.</param>
public static void RunWithHwIntrinsicsFeature<T>(
Action<string> action,
HwIntrinsics intrinsics,
int serializable)
T serializable,
HwIntrinsics intrinsics)
where T : IConvertible
{
if (!RemoteExecutor.IsSupported)
{

4
tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs

@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests
static void AssertHwIntrinsicsFeatureDisabled(string serializable)
{
Assert.NotNull(serializable);
Assert.NotNull(FeatureTestRunner.Deserialize<FakeSerializable>(serializable));
Assert.NotNull(FeatureTestRunner.DeserializeForXunit<FakeSerializable>(serializable));
#if SUPPORTS_RUNTIME_INTRINSICS
Assert.False(Sse.IsSupported);
@ -202,7 +202,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests
static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic)
{
Assert.NotNull(serializable);
Assert.NotNull(FeatureTestRunner.Deserialize<FakeSerializable>(serializable));
Assert.NotNull(FeatureTestRunner.DeserializeForXunit<FakeSerializable>(serializable));
switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic))
{

Loading…
Cancel
Save