From 8f41f1ae7dacfb83feceaf9fd0f1357cdc1b49f0 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Thu, 18 May 2017 04:16:02 +0200 Subject: [PATCH] more benchmarks + removed obsolete TODO note --- src/ImageSharp/PixelFormats/IPixel.cs | 3 - .../PixelConversion_ConvertFromRgba32.cs | 206 ++++++++++++++++++ .../PixelConversion_ConvertFromVector4.cs | 156 +++++++++++++ ....cs => PixelConversion_ConvertToRgba32.cs} | 22 +- 4 files changed, 373 insertions(+), 14 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromRgba32.cs create mode 100644 tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromVector4.cs rename tests/ImageSharp.Benchmarks/General/{PixelConversion.cs => PixelConversion_ConvertToRgba32.cs} (86%) diff --git a/src/ImageSharp/PixelFormats/IPixel.cs b/src/ImageSharp/PixelFormats/IPixel.cs index 8e0631b70..cc261e2eb 100644 --- a/src/ImageSharp/PixelFormats/IPixel.cs +++ b/src/ImageSharp/PixelFormats/IPixel.cs @@ -44,9 +44,6 @@ namespace ImageSharp.PixelFormats /// /// Sets the packed representation from the given byte array. /// - /// - /// TODO: Refactor this, defining multiple PackFromAsdf42(ref Asdf42 source) methods instead. Should be faster on many execution paths! - /// /// The x-component. /// The y-component. /// The z-component. diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromRgba32.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromRgba32.cs new file mode 100644 index 000000000..e096fd828 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromRgba32.cs @@ -0,0 +1,206 @@ +// ReSharper disable InconsistentNaming +namespace ImageSharp.Benchmarks.General +{ + using System.Runtime.CompilerServices; + using System.Runtime.InteropServices; + + using BenchmarkDotNet.Attributes; + + public class PixelConversion_ConvertFromRgba32 + { + interface ITestPixel + where T : struct, ITestPixel + { + void FromRgba32(Rgba32 source); + + void FromRgba32(ref Rgba32 source); + + void FromBytes(byte r, byte g, byte b, byte a); + } + + [StructLayout(LayoutKind.Sequential)] + struct TestArgb : ITestPixel + { + private byte a, r, g, b; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromRgba32(Rgba32 p) + { + this.r = p.R; + this.g = p.G; + this.b = p.B; + this.a = p.A; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromRgba32(ref Rgba32 p) + { + this.r = p.R; + this.g = p.G; + this.b = p.B; + this.a = p.A; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromBytes(byte r, byte g, byte b, byte a) + { + this.r = r; + this.g = g; + this.b = b; + this.a = a; + } + } + + [StructLayout(LayoutKind.Sequential)] + struct TestRgba : ITestPixel + { + private byte r, g, b, a; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromRgba32(Rgba32 source) + { + this = Unsafe.As(ref source); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromRgba32(ref Rgba32 source) + { + this = Unsafe.As(ref source); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromBytes(byte r, byte g, byte b, byte a) + { + this.r = r; + this.g = g; + this.b = b; + this.a = a; + } + } + + struct ConversionRunner + where T : struct, ITestPixel + { + private T[] dest; + + private Rgba32[] source; + + public ConversionRunner(int count) + { + this.dest = new T[count]; + this.source = new Rgba32[count]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void RunByRefConversion() + { + int count = this.dest.Length; + + ref T destBaseRef = ref this.dest[0]; + ref Rgba32 sourceBaseRef = ref this.source[0]; + + for (int i = 0; i < count; i++) + { + Unsafe.Add(ref destBaseRef, i).FromRgba32(ref Unsafe.Add(ref sourceBaseRef, i)); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void RunByValConversion() + { + int count = this.dest.Length; + + ref T destBaseRef = ref this.dest[0]; + ref Rgba32 sourceBaseRef = ref this.source[0]; + + for (int i = 0; i < count; i++) + { + Unsafe.Add(ref destBaseRef, i).FromRgba32(Unsafe.Add(ref sourceBaseRef, i)); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void RunFromBytesConversion() + { + int count = this.dest.Length; + + ref T destBaseRef = ref this.dest[0]; + ref Rgba32 sourceBaseRef = ref this.source[0]; + + for (int i = 0; i < count; i++) + { + ref Rgba32 s = ref Unsafe.Add(ref sourceBaseRef, i); + Unsafe.Add(ref destBaseRef, i).FromBytes(s.R, s.G, s.B, s.A); + } + } + } + + private ConversionRunner compatibleMemLayoutRunner; + + private ConversionRunner permutedRunner; + + [Params(32)] + public int Count { get; set; } + + [Setup] + public void Setup() + { + this.compatibleMemLayoutRunner = new ConversionRunner(this.Count); + this.permutedRunner = new ConversionRunner(this.Count); + } + + [Benchmark(Baseline = true)] + public void CompatibleByRef() + { + this.compatibleMemLayoutRunner.RunByRefConversion(); + } + + [Benchmark] + public void CompatibleByVal() + { + this.compatibleMemLayoutRunner.RunByValConversion(); + } + + [Benchmark] + public void CompatibleFromBytes() + { + this.compatibleMemLayoutRunner.RunFromBytesConversion(); + } + + + [Benchmark] + public void PermutedByRef() + { + this.permutedRunner.RunByRefConversion(); + } + + [Benchmark] + public void PermutedByVal() + { + this.permutedRunner.RunByValConversion(); + } + + [Benchmark] + public void PermutedFromBytes() + { + this.permutedRunner.RunFromBytesConversion(); + } + } + + /* + * Results: + * Method | Count | Mean | StdDev | Scaled | Scaled-StdDev | + * ------------------ |------ |----------- |---------- |------- |-------------- | + * CompatibleByRef | 32 | 20.6339 ns | 0.0742 ns | 1.00 | 0.00 | + * CompatibleByVal | 32 | 23.7425 ns | 0.0997 ns | 1.15 | 0.01 | + * CompatibleFromBytes | 32 | 38.7017 ns | 0.1103 ns | 1.88 | 0.01 | + * PermutedByRef | 32 | 39.2892 ns | 0.1366 ns | 1.90 | 0.01 | + * PermutedByVal | 32 | 38.5178 ns | 0.1946 ns | 1.87 | 0.01 | + * PermutedFromBytes | 32 | 38.6683 ns | 0.0801 ns | 1.87 | 0.01 | + * + * !!! Conclusion !!! + * All memory-incompatible (permuted) variants are equivalent with the the "FromBytes" solution. + * In memory compatible cases we should use the optimized Bulk-copying variant anyways, + * so there is no benefit introducing non-bulk API-s other than PackFromBytes() OR PackFromRgba32(). + */ +} \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromVector4.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromVector4.cs new file mode 100644 index 000000000..721ac121a --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertFromVector4.cs @@ -0,0 +1,156 @@ +// ReSharper disable InconsistentNaming +namespace ImageSharp.Benchmarks.General +{ + using System.Numerics; + using System.Runtime.CompilerServices; + using System.Runtime.InteropServices; + + using BenchmarkDotNet.Attributes; + + public class PixelConversion_ConvertFromVector4 + { + interface ITestPixel + where T : struct, ITestPixel + { + void FromVector4(Vector4 source); + + void FromVector4(ref Vector4 source); + } + + [StructLayout(LayoutKind.Sequential)] + struct TestArgb : ITestPixel + { + private byte a, r, g, b; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromVector4(Vector4 p) + { + this.r = (byte)p.X; + this.g = (byte)p.Y; + this.b = (byte)p.Z; + this.a = (byte)p.W; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromVector4(ref Vector4 p) + { + this.r = (byte)p.X; + this.g = (byte)p.Y; + this.b = (byte)p.Z; + this.a = (byte)p.W; + } + } + + [StructLayout(LayoutKind.Sequential)] + struct TestRgbaVector : ITestPixel + { + private Vector4 v; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromVector4(Vector4 p) + { + this.v = p; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void FromVector4(ref Vector4 p) + { + this.v = p; + } + } + + struct ConversionRunner + where T : struct, ITestPixel + { + private T[] dest; + + private Vector4[] source; + + public ConversionRunner(int count) + { + this.dest = new T[count]; + this.source = new Vector4[count]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void RunByRefConversion() + { + int count = this.dest.Length; + + ref T destBaseRef = ref this.dest[0]; + ref Vector4 sourceBaseRef = ref this.source[0]; + + for (int i = 0; i < count; i++) + { + Unsafe.Add(ref destBaseRef, i).FromVector4(ref Unsafe.Add(ref sourceBaseRef, i)); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void RunByValConversion() + { + int count = this.dest.Length; + + ref T destBaseRef = ref this.dest[0]; + ref Vector4 sourceBaseRef = ref this.source[0]; + + for (int i = 0; i < count; i++) + { + Unsafe.Add(ref destBaseRef, i).FromVector4(Unsafe.Add(ref sourceBaseRef, i)); + } + } + } + + private ConversionRunner nonVectorRunner; + + private ConversionRunner vectorRunner; + + [Params(32)] + public int Count { get; set; } + + [Setup] + public void Setup() + { + this.nonVectorRunner = new ConversionRunner(this.Count); + this.vectorRunner = new ConversionRunner(this.Count); + } + + [Benchmark(Baseline = true)] + public void VectorByRef() + { + this.vectorRunner.RunByRefConversion(); + } + + [Benchmark] + public void VectorByVal() + { + this.vectorRunner.RunByValConversion(); + } + + [Benchmark] + public void NonVectorByRef() + { + this.nonVectorRunner.RunByRefConversion(); + } + + [Benchmark] + public void NonVectorByVal() + { + this.nonVectorRunner.RunByValConversion(); + } + + } + + /* + * Results: + * Method | Count | Mean | StdDev | Scaled | Scaled-StdDev | + * --------------- |------ |----------- |---------- |------- |-------------- | + * VectorByRef | 32 | 23.6678 ns | 0.1141 ns | 1.00 | 0.00 | + * VectorByVal | 32 | 24.5347 ns | 0.0771 ns | 1.04 | 0.01 | + * NonVectorByRef | 32 | 59.0187 ns | 0.2114 ns | 2.49 | 0.01 | + * NonVectorByVal | 32 | 58.7529 ns | 0.2545 ns | 2.48 | 0.02 | + * + * !!! Conclusion !!! + * We do not need by-ref version of ConvertFromVector4() stuff + */ +} \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertToRgba32.cs similarity index 86% rename from tests/ImageSharp.Benchmarks/General/PixelConversion.cs rename to tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertToRgba32.cs index 77e728280..0c9a8af3f 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion_ConvertToRgba32.cs @@ -1,4 +1,5 @@ -namespace ImageSharp.Benchmarks.General +// ReSharper disable InconsistentNaming +namespace ImageSharp.Benchmarks.General { using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -12,7 +13,7 @@ /// 2. void CopyToRgba32(ref Rgba32 dest); /// ? /// - public class PixelConversion + public class PixelConversion_ConvertToRgba32 { interface ITestPixel where T : struct, ITestPixel @@ -103,7 +104,7 @@ } } - private ConversionRunner inOrderRunner; + private ConversionRunner compatibleMemoryLayoutRunner; private ConversionRunner permutedRunner; @@ -113,20 +114,20 @@ [Setup] public void Setup() { - this.inOrderRunner = new ConversionRunner(this.Count); + this.compatibleMemoryLayoutRunner = new ConversionRunner(this.Count); this.permutedRunner = new ConversionRunner(this.Count); } [Benchmark(Baseline = true)] - public void InOrderRetval() + public void CompatibleRetval() { - this.inOrderRunner.RunRetvalConversion(); + this.compatibleMemoryLayoutRunner.RunRetvalConversion(); } [Benchmark] - public void InOrderCopyTo() + public void CompatibleCopyTo() { - this.inOrderRunner.RunCopyToConversion(); + this.compatibleMemoryLayoutRunner.RunCopyToConversion(); } [Benchmark] @@ -147,10 +148,9 @@ * * Method | Count | Mean | StdDev | Scaled | Scaled-StdDev | * --------------- |------ |------------ |---------- |------- |-------------- | - * InOrderRetval | 128 | 89.7358 ns | 2.2389 ns | 1.00 | 0.00 | - * InOrderCopyTo | 128 | 89.4112 ns | 2.2901 ns | 1.00 | 0.03 | + * CompatibleRetval | 128 | 89.7358 ns | 2.2389 ns | 1.00 | 0.00 | + * CompatibleCopyTo | 128 | 89.4112 ns | 2.2901 ns | 1.00 | 0.03 | * PermutedRetval | 128 | 845.4038 ns | 5.6154 ns | 9.43 | 0.23 | * PermutedCopyTo | 128 | 155.6004 ns | 3.8870 ns | 1.73 | 0.06 | - * */ } \ No newline at end of file