Browse Source

Fix benchmarks, cleanup.

js/color-alpha-handling
James Jackson-South 5 years ago
parent
commit
cdc1c0fce5
  1. 24
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 2
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  3. 57
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
  4. 60
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
  5. 6
      tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs

24
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -33,15 +33,9 @@ namespace SixLabors.ImageSharp
{
if (Avx.IsSupported || Sse.IsSupported)
{
int remainder;
if (Avx.IsSupported)
{
remainder = ImageMaths.ModuloP2(source.Length, Vector256<float>.Count);
}
else
{
remainder = ImageMaths.ModuloP2(source.Length, Vector128<float>.Count);
}
int remainder = Avx.IsSupported
? ImageMaths.ModuloP2(source.Length, Vector256<float>.Count)
: ImageMaths.ModuloP2(source.Length, Vector128<float>.Count);
int adjustedCount = source.Length - remainder;
@ -73,15 +67,9 @@ namespace SixLabors.ImageSharp
{
if (Avx2.IsSupported || Ssse3.IsSupported)
{
int remainder;
if (Avx2.IsSupported)
{
remainder = ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count);
}
else
{
remainder = ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count);
}
int remainder = Avx2.IsSupported
? ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count)
: ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count);
int adjustedCount = source.Length - remainder;

2
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -25,8 +25,6 @@ namespace SixLabors.ImageSharp
{
VerifyShuffleSpanInput(source, dest);
// TODO: There doesn't seem to be any APIs for
// System.Numerics that allow shuffling.
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control);
#endif

57
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs

@ -30,39 +30,38 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
}
// 2020-10-26
// 2020-10-29
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK = 5.0.100-rc.2.20479.15
//
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |---------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Channel | AVX | Empty | 128 | 20.51 ns | 0.270 ns | 0.211 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.00 ns | 0.991 ns | 0.927 ns | 3.08 | 0.06 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 17.25 ns | 0.066 ns | 0.058 ns | 0.84 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 256 | 24.57 ns | 0.248 ns | 0.219 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.55 ns | 2.501 ns | 2.456 ns | 5.06 | 0.10 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 21.80 ns | 0.094 ns | 0.088 ns | 0.89 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 512 | 28.51 ns | 0.130 ns | 0.115 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 256.52 ns | 1.424 ns | 1.332 ns | 9.00 | 0.07 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 29.72 ns | 0.217 ns | 0.203 ns | 1.04 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 1024 | 36.40 ns | 0.357 ns | 0.334 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 492.71 ns | 1.498 ns | 1.251 ns | 13.52 | 0.12 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 44.71 ns | 0.264 ns | 0.234 ns | 1.23 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 2048 | 59.38 ns | 0.180 ns | 0.159 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 975.05 ns | 2.043 ns | 1.811 ns | 16.42 | 0.05 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 81.83 ns | 0.212 ns | 0.198 ns | 1.38 | 0.01 | - | - | - | - |
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - |
}

60
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs

@ -10,6 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class ShuffleFloat4Channel
{
private static readonly byte control = default(WXYZShuffle4).Control;
private float[] source;
private float[] destination;
@ -26,43 +27,42 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4Channel(this.source, this.destination, default(WXYZShuffle4).Control);
SimdUtils.Shuffle4Channel(this.source, this.destination, control);
}
}
// 2020-10-26
// 2020-10-29
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK = 5.0.100-rc.2.20479.15
//
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |---------------- |-------------------------------------------------- |------ |------------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Channel | AVX | Empty | 128 | 14.49 ns | 0.244 ns | 0.217 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 87.74 ns | 0.524 ns | 0.490 ns | 6.06 | 0.09 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 23.65 ns | 0.101 ns | 0.094 ns | 1.63 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 256 | 25.87 ns | 0.492 ns | 0.673 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 159.52 ns | 0.901 ns | 0.843 ns | 6.12 | 0.12 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 45.47 ns | 0.404 ns | 0.378 ns | 1.75 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 512 | 49.51 ns | 0.088 ns | 0.083 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 297.96 ns | 0.926 ns | 0.821 ns | 6.02 | 0.02 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 90.77 ns | 0.191 ns | 0.169 ns | 1.83 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 1024 | 113.09 ns | 1.913 ns | 3.090 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 604.58 ns | 1.464 ns | 1.298 ns | 5.29 | 0.18 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 179.44 ns | 0.208 ns | 0.184 ns | 1.57 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | AVX | Empty | 2048 | 217.95 ns | 1.314 ns | 1.165 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 1,152.04 ns | 3.941 ns | 3.494 ns | 5.29 | 0.03 | - | - | - | - |
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 349.52 ns | 0.587 ns | 0.520 ns | 1.60 | 0.01 | - | - | - | - |
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - |
}

6
tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs

@ -62,20 +62,20 @@ namespace SixLabors.ImageSharp.Benchmarks
.WithEnvironmentVariables(
new EnvironmentVariable(EnableHWIntrinsic, Off),
new EnvironmentVariable(FeatureSIMD, Off))
.WithId("No HwIntrinsics"));
.WithId("1. No HwIntrinsics").AsBaseline());
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithId("AVX").AsBaseline());
.WithId("2. AVX"));
}
if (Sse.IsSupported)
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off))
.WithId("SSE"));
.WithId("3. SSE"));
}
#endif
}

Loading…
Cancel
Save