Browse Source

disappointing benchmark results

af/merge-core
Anton Firszov 8 years ago
parent
commit
4ab0731425
  1. 5
      src/ImageSharp/Common/Extensions/SimdUtils.ExtendedIntrinsics.cs
  2. 48
      tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
  3. 6
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

5
src/ImageSharp/Common/Extensions/SimdUtils.ExtendedIntrinsics.cs

@ -66,6 +66,10 @@ namespace SixLabors.ImageSharp
/// <summary>
/// A variant of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
/// </summary>
/// <remarks>
/// It does NOT worth yet to utilize this method (2018 Oct).
/// See benchmark results for the "PackFromVector4_Rgba32" benchmark!
/// </remarks>
// ReSharper disable once MemberHidesStaticFromOuterClass
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
{
@ -107,7 +111,6 @@ namespace SixLabors.ImageSharp
Unsafe.Add(ref destBase, i) = b;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]

48
tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs

@ -3,6 +3,7 @@
// ReSharper disable InconsistentNaming
using System;
using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
@ -19,9 +20,9 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public abstract class PackFromVector4<TPixel>
where TPixel : struct, IPixel<TPixel>
{
private IMemoryOwner<Vector4> source;
protected IMemoryOwner<Vector4> source;
private IMemoryOwner<TPixel> destination;
protected IMemoryOwner<TPixel> destination;
[Params(
//64,
@ -42,7 +43,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
this.source.Dispose();
}
[Benchmark(Baseline = true)]
[Benchmark]
public void PerElement()
{
ref Vector4 s = ref MemoryMarshal.GetReference(this.source.GetSpan());
@ -54,7 +55,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
}
[Benchmark]
[Benchmark(Baseline = true)]
public void CommonBulk()
{
new PixelOperations<TPixel>().PackFromVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
@ -69,6 +70,45 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public class PackFromVector4_Rgba32 : PackFromVector4<Rgba32>
{
//[Benchmark]
public void BulkConvertNormalizedFloatToByteClampOverflows()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats);
}
[Benchmark]
public void ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats);
}
// RESULTS:
// BenchmarkDotNet=v0.10.14, OS=Windows 10.0.17134
// Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
// Frequency=2742187 Hz, Resolution=364.6724 ns, Timer=TSC
// .NET Core SDK=2.1.400-preview-009063
// [Host] : .NET Core 2.1.1 (CoreCLR 4.6.26606.02, CoreFX 4.6.26606.05), 64bit RyuJIT
// Job-XIFINS : .NET Framework 4.7.1 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3190.0
// Job-RTQZPN : .NET Core 2.1.1 (CoreCLR 4.6.26606.02, CoreFX 4.6.26606.05), 64bit RyuJIT
//
// LaunchCount=1 TargetCount=3 WarmupCount=3
//
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Allocated |
// ----------------------------------------------------------------- |-------- |------ |----------:|-----------:|----------:|-------:|---------:|----------:|
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 3.755 us | 0.8959 us | 0.0506 us | 0.22 | 0.00 | 0 B |
// PerElement | Clr | 2048 | 17.387 us | 15.1569 us | 0.8564 us | 1.02 | 0.04 | 0 B |
// CommonBulk | Clr | 2048 | 17.121 us | 0.7634 us | 0.0431 us | 1.00 | 0.00 | 24 B |
// OptimizedBulk | Clr | 2048 | 4.018 us | 0.3858 us | 0.0218 us | 0.23 | 0.00 | 0 B |
// | | | | | | | | |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 22.232 us | 1.6154 us | 0.0913 us | 1.31 | 0.04 | 0 B |
// PerElement | Core | 2048 | 16.741 us | 2.9254 us | 0.1653 us | 0.98 | 0.03 | 0 B |
// CommonBulk | Core | 2048 | 17.022 us | 11.4894 us | 0.6492 us | 1.00 | 0.00 | 24 B |
// OptimizedBulk | Core | 2048 | 3.707 us | 0.1500 us | 0.0085 us | 0.22 | 0.01 | 0 B |
}
}

6
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
}
//[Benchmark]
[Benchmark]
public void BulkConvertByteToNormalizedFloat()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
@ -89,12 +89,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
[Benchmark]
public void BulkConvertByteToNormalizedFloatFast()
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.BulkConvertByteToNormalizedFloatWithExtendedIntrinsics(sBytes, dFloats);
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
}
}

Loading…
Cancel
Save