From ff757fb73361e1b2ab02401ccf03f5468f81aca7 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Tue, 23 Oct 2018 11:06:32 +0200 Subject: [PATCH] bitwise conversion + benchmarks WIP --- .../Decoder/JpegImagePostProcessor.cs | 2 +- src/ImageSharp/PixelFormats/PixelConverter.cs | 47 ++++++ .../FrameQuantizerBase{TPixel}.cs | 2 - .../PixelConversion_ConvertFromRgba32.cs | 144 +++++++++++++---- .../General/PixelConversion/TestArgb.cs | 62 ++++---- .../General/PixelConversion/TestRgba.cs | 14 +- .../PixelFormats/PixelConverterTests.cs | 150 ++++++++++++++++++ .../PixelFormats/Rgba32Tests.cs | 1 + 8 files changed, 347 insertions(+), 75 deletions(-) create mode 100644 src/ImageSharp/PixelFormats/PixelConverter.cs create mode 100644 tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegImagePostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegImagePostProcessor.cs index 1a6da2b2b..7ce86b4c9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegImagePostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegImagePostProcessor.cs @@ -162,7 +162,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder this.colorConverter.ConvertToRgba(values, this.rgbaBuffer.GetSpan()); Span destRow = destination.GetPixelRowSpan(yy); - + // TODO: Investigate if slicing is actually necessary PixelOperations.Instance.FromVector4(this.configuration, this.rgbaBuffer.GetSpan().Slice(0, destRow.Length), destRow); } diff --git a/src/ImageSharp/PixelFormats/PixelConverter.cs b/src/ImageSharp/PixelFormats/PixelConverter.cs new file mode 100644 index 000000000..3686092cd --- /dev/null +++ b/src/ImageSharp/PixelFormats/PixelConverter.cs @@ -0,0 +1,47 @@ +// Copyright (c) Six Labors and contributors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.PixelFormats +{ + /// + /// Contains optimized implementations for conversion between pixel formats. + /// + /// + /// Implementations are based on ideas in: + /// https://github.com/dotnet/coreclr/blob/master/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs#L84 + /// The JIT should be able to detect and optimize ROL and ROR patterns. + /// + internal static class PixelConverter + { + public static class Rgba32 + { + /// + /// Converts a packed to . + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static uint ToArgb32(uint packedRgba) + { + // packedRgba = [aa bb gg rr] + // ROL(8, packedRgba): + return (packedRgba << 8) | (packedRgba >> 24); + } + } + + public static class Argb32 + { + /// + /// Converts a packed to . + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static uint ToRgba32(uint packedArgb) + { + // packedArgb = [bb gg rr aa] + // ROR(8, packedArgb): + return (packedArgb >> 8) | (packedArgb << 24); + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Processing/Processors/Quantization/FrameQuantizerBase{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/FrameQuantizerBase{TPixel}.cs index a41127bfc..a8c6c5d7e 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/FrameQuantizerBase{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/FrameQuantizerBase{TPixel}.cs @@ -17,8 +17,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization public abstract class FrameQuantizerBase : IFrameQuantizer where TPixel : struct, IPixel { - private readonly Configuration configuration; - /// /// A lookup table for colors /// diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs index 6a96c8576..1be8347c3 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs @@ -1,6 +1,8 @@ // ReSharper disable InconsistentNaming +using System; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; @@ -8,14 +10,14 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion { - public class PixelConversion_ConvertFromRgba32 + public abstract class PixelConversion_ConvertFromRgba32 { - struct ConversionRunner + internal struct ConversionRunner where T : struct, ITestPixel { - private T[] dest; + public readonly T[] dest; - private Rgba32[] source; + public readonly Rgba32[] source; public ConversionRunner(int count) { @@ -67,72 +69,146 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } } - private ConversionRunner compatibleMemLayoutRunner; + internal ConversionRunner compatibleMemLayoutRunner; - private ConversionRunner permutedRunner; + internal ConversionRunner permutedRunnerRgbaToArgb; - [Params(32)] + [Params( + 256, + 2048 + )] public int Count { get; set; } [GlobalSetup] public void Setup() { this.compatibleMemLayoutRunner = new ConversionRunner(this.Count); - this.permutedRunner = new ConversionRunner(this.Count); + this.permutedRunnerRgbaToArgb = new ConversionRunner(this.Count); } + } + public class PixelConversion_ConvertFromRgba32_Compatible : PixelConversion_ConvertFromRgba32 + { [Benchmark(Baseline = true)] - public void CompatibleByRef() + public void ByRef() { this.compatibleMemLayoutRunner.RunByRefConversion(); } [Benchmark] - public void CompatibleByVal() + public void ByVal() { this.compatibleMemLayoutRunner.RunByValConversion(); } [Benchmark] - public void CompatibleFromBytes() + public void FromBytes() { this.compatibleMemLayoutRunner.RunFromBytesConversion(); } + [Benchmark] + public void Inline() + { + ref Rgba32 sBase = ref this.compatibleMemLayoutRunner.source[0]; + ref Rgba32 dBase = ref Unsafe.As(ref this.compatibleMemLayoutRunner.dest[0]); + + for (int i = 0; i < this.Count; i++) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, i); + } + } + + // Method | Count | Mean | Error | StdDev | Scaled | ScaledSD | + // ---------- |------ |---------:|---------:|---------:|-------:|---------:| + // ByRef | 256 | 128.5 ns | 1.217 ns | 1.138 ns | 1.00 | 0.00 | + // ByVal | 256 | 196.7 ns | 2.792 ns | 2.612 ns | 1.53 | 0.02 | + // FromBytes | 256 | 321.7 ns | 2.180 ns | 1.820 ns | 2.50 | 0.03 | + // Inline | 256 | 129.9 ns | 2.759 ns | 2.581 ns | 1.01 | 0.02 | + } + + public class PixelConversion_ConvertFromRgba32_Permuted_RgbaToArgb : PixelConversion_ConvertFromRgba32 + { + [Benchmark(Baseline = true)] + public void ByRef() + { + this.permutedRunnerRgbaToArgb.RunByRefConversion(); + } + + [Benchmark] + public void ByVal() + { + this.permutedRunnerRgbaToArgb.RunByValConversion(); + } + + [Benchmark] + public void FromBytes() + { + this.permutedRunnerRgbaToArgb.RunFromBytesConversion(); + } [Benchmark] - public void PermutedByRef() + public void InlineShuffle() { - this.permutedRunner.RunByRefConversion(); + ref Rgba32 sBase = ref this.permutedRunnerRgbaToArgb.source[0]; + ref TestArgb dBase = ref this.permutedRunnerRgbaToArgb.dest[0]; + + for (int i = 0; i < this.Count; i++) + { + Rgba32 s = Unsafe.Add(ref sBase, i); + ref TestArgb d = ref Unsafe.Add(ref dBase, i); + + d.R = s.R; + d.G = s.G; + d.B = s.B; + d.A = s.A; + } } [Benchmark] - public void PermutedByVal() + public void PixelConverter_Rgba32_ToArgb32() { - this.permutedRunner.RunByValConversion(); + ref uint sBase = ref Unsafe.As(ref this.permutedRunnerRgbaToArgb.source[0]); + ref uint dBase = ref Unsafe.As(ref this.permutedRunnerRgbaToArgb.dest[0]); + + for (int i = 0; i < this.Count; i++) + { + uint s = Unsafe.Add(ref sBase, i); + Unsafe.Add(ref dBase, i) = PixelConverter.Rgba32.ToArgb32(s); + } } [Benchmark] - public void PermutedFromBytes() + public void PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer() { - this.permutedRunner.RunFromBytesConversion(); + Span source = MemoryMarshal.Cast(this.permutedRunnerRgbaToArgb.source); + Span dest = MemoryMarshal.Cast(this.permutedRunnerRgbaToArgb.dest); + source.CopyTo(dest); + + ref uint dBase = ref MemoryMarshal.GetReference(dest); + + for (int i = 0; i < this.Count; i++) + { + uint s = Unsafe.Add(ref dBase, i); + Unsafe.Add(ref dBase, i) = PixelConverter.Rgba32.ToArgb32(s); + } } - } - /* - * Results: - * Method | Count | Mean | StdDev | Scaled | Scaled-StdDev | - * ------------------ |------ |----------- |---------- |------- |-------------- | - * CompatibleByRef | 32 | 20.6339 ns | 0.0742 ns | 1.00 | 0.00 | - * CompatibleByVal | 32 | 23.7425 ns | 0.0997 ns | 1.15 | 0.01 | - * CompatibleFromBytes | 32 | 38.7017 ns | 0.1103 ns | 1.88 | 0.01 | - * PermutedByRef | 32 | 39.2892 ns | 0.1366 ns | 1.90 | 0.01 | - * PermutedByVal | 32 | 38.5178 ns | 0.1946 ns | 1.87 | 0.01 | - * PermutedFromBytes | 32 | 38.6683 ns | 0.0801 ns | 1.87 | 0.01 | - * - * !!! Conclusion !!! - * All memory-incompatible (permuted) variants are equivalent with the the "FromBytes" solution. - * In memory compatible cases we should use the optimized Bulk-copying variant anyways, - * so there is no benefit introducing non-bulk API-s other than FromBytes() OR FromRgba32(). - */ + // RESULTS: + // Method | Count | Mean | Error | StdDev | Scaled | ScaledSD | + // ---------------------------------------------------------- |------ |-----------:|-----------:|-----------:|-------:|---------:| + // ByRef | 256 | 328.7 ns | 6.6141 ns | 6.1868 ns | 1.00 | 0.00 | + // ByVal | 256 | 322.0 ns | 4.3541 ns | 4.0728 ns | 0.98 | 0.02 | + // FromBytes | 256 | 321.5 ns | 3.3499 ns | 3.1335 ns | 0.98 | 0.02 | + // InlineShuffle | 256 | 330.7 ns | 4.2525 ns | 3.9778 ns | 1.01 | 0.02 | + // PixelConverter_Rgba32_ToArgb32 | 256 | 167.4 ns | 0.6357 ns | 0.5309 ns | 0.51 | 0.01 | + // PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 256 | 196.6 ns | 0.8929 ns | 0.7915 ns | 0.60 | 0.01 | + // | | | | | | | + // ByRef | 2048 | 2,534.4 ns | 8.2947 ns | 6.9265 ns | 1.00 | 0.00 | + // ByVal | 2048 | 2,638.5 ns | 52.6843 ns | 70.3320 ns | 1.04 | 0.03 | + // FromBytes | 2048 | 2,517.2 ns | 40.8055 ns | 38.1695 ns | 0.99 | 0.01 | + // InlineShuffle | 2048 | 2,546.5 ns | 21.2506 ns | 19.8778 ns | 1.00 | 0.01 | + // PixelConverter_Rgba32_ToArgb32 | 2048 | 1,265.7 ns | 5.1397 ns | 4.5562 ns | 0.50 | 0.00 | + // PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 2048 | 1,410.3 ns | 11.1939 ns | 9.9231 ns | 0.56 | 0.00 |// + } } \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/TestArgb.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/TestArgb.cs index 61a7df81d..76de794ec 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/TestArgb.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/TestArgb.cs @@ -9,81 +9,81 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [StructLayout(LayoutKind.Sequential)] struct TestArgb : ITestPixel { - private byte a, r, g, b; + public byte A, R, G, B; [MethodImpl(MethodImplOptions.AggressiveInlining)] public void FromRgba32(Rgba32 p) { - this.r = p.R; - this.g = p.G; - this.b = p.B; - this.a = p.A; + this.R = p.R; + this.G = p.G; + this.B = p.B; + this.A = p.A; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void FromRgba32(ref Rgba32 p) { - this.r = p.R; - this.g = p.G; - this.b = p.B; - this.a = p.A; + this.R = p.R; + this.G = p.G; + this.B = p.B; + this.A = p.A; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void FromBytes(byte r, byte g, byte b, byte a) { - this.r = r; - this.g = g; - this.b = b; - this.a = a; + this.R = r; + this.G = g; + this.B = b; + this.A = a; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void FromVector4(Vector4 p) { - this.r = (byte)p.X; - this.g = (byte)p.Y; - this.b = (byte)p.Z; - this.a = (byte)p.W; + this.R = (byte)p.X; + this.G = (byte)p.Y; + this.B = (byte)p.Z; + this.A = (byte)p.W; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void FromVector4(ref Vector4 p) { - this.r = (byte)p.X; - this.g = (byte)p.Y; - this.b = (byte)p.Z; - this.a = (byte)p.W; + this.R = (byte)p.X; + this.G = (byte)p.Y; + this.B = (byte)p.Z; + this.A = (byte)p.W; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public Rgba32 ToRgba32() { - return new Rgba32(this.r, this.g, this.b, this.a); + return new Rgba32(this.R, this.G, this.B, this.A); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyToRgba32(ref Rgba32 dest) { - dest.R = this.r; - dest.G = this.g; - dest.B = this.b; - dest.A = this.a; + dest.R = this.R; + dest.G = this.G; + dest.B = this.B; + dest.A = this.A; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector4 ToVector4() { - return new Vector4(this.r, this.g, this.b, this.a); + return new Vector4(this.R, this.G, this.B, this.A); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyToVector4(ref Vector4 dest) { - dest.X = this.r; - dest.Y = this.g; - dest.Z = this.b; - dest.W = this.a; + dest.X = this.R; + dest.Y = this.G; + dest.Z = this.B; + dest.W = this.A; } } } \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/TestRgba.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/TestRgba.cs index cc8cf352a..36d5f3e5b 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/TestRgba.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/TestRgba.cs @@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [StructLayout(LayoutKind.Sequential)] struct TestRgba : ITestPixel { - private byte r, g, b, a; + public byte R, G, B, A; [MethodImpl(MethodImplOptions.AggressiveInlining)] public void FromRgba32(Rgba32 source) @@ -26,10 +26,10 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [MethodImpl(MethodImplOptions.AggressiveInlining)] public void FromBytes(byte r, byte g, byte b, byte a) { - this.r = r; - this.g = g; - this.b = b; - this.a = a; + this.R = r; + this.G = g; + this.B = b; + this.A = a; } public void FromVector4(Vector4 source) @@ -57,13 +57,13 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector4 ToVector4() { - return new Vector4(this.r, this.g, this.b, this.a) * new Vector4(1f / 255f); + return new Vector4(this.R, this.G, this.B, this.A) * new Vector4(1f / 255f); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyToVector4(ref Vector4 dest) { - var tmp = new Vector4(this.r, this.g, this.b, this.a); + var tmp = new Vector4(this.R, this.G, this.B, this.A); tmp *= new Vector4(1f / 255f); dest = tmp; } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs new file mode 100644 index 000000000..83c4e34f2 --- /dev/null +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs @@ -0,0 +1,150 @@ +using SixLabors.ImageSharp.PixelFormats; + +using Xunit; + +namespace SixLabors.ImageSharp.Tests.PixelFormats +{ + public class PixelConverterTests + { + public static readonly TheoryData RgbaData = + new TheoryData + { + { 0, 0, 0, 0 }, + { 0, 0, 0, 255 }, + { 0, 0, 255, 0 }, + { 0, 255, 0, 0 }, + { 255, 0, 0, 0 }, + { 255, 255, 255, 255 }, + { 0, 0, 0, 1 }, + { 0, 0, 1, 0 }, + { 0, 1, 0, 0 }, + { 1, 0, 0, 0 }, + { 3, 5, 7, 11 }, + { 67, 71, 101, 109 } + }; + + [Theory] + [MemberData(nameof(RgbaData))] + public void Rgba32ToArgb32(byte r, byte g, byte b, byte a) + { + Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + + // Act: + uint actualPacked = PixelConverter.Rgba32.ToArgb32(s.PackedValue); + + // Assert: + uint expectedPacked = ReferenceImplementations.ToArgb32(s).PackedValue; + + Assert.Equal(expectedPacked, actualPacked); + } + + [Theory] + [MemberData(nameof(RgbaData))] + public void Argb32ToRgba32(byte r, byte g, byte b, byte a) + { + Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + + // Act: + uint actualPacked = PixelConverter.Argb32.ToRgba32(s.PackedValue); + + // Assert: + uint expectedPacked = ReferenceImplementations.ToRgba32(s).PackedValue; + + Assert.Equal(expectedPacked, actualPacked); + } + + + private static class ReferenceImplementations + { + public static Rgba32 MakeRgba32(byte r, byte g, byte b, byte a) + { + Rgba32 d = default; + d.R = r; + d.G = g; + d.B = b; + d.A = a; + return d; + } + + public static Argb32 MakeArgb32(byte r, byte g, byte b, byte a) + { + Argb32 d = default; + d.R = r; + d.G = g; + d.B = b; + d.A = a; + return d; + } + + public static Bgra32 MakeBgra32(byte r, byte g, byte b, byte a) + { + Bgra32 d = default; + d.R = r; + d.G = g; + d.B = b; + d.A = a; + return d; + } + + public static Argb32 ToArgb32(Rgba32 s) + { + Argb32 d = default; + d.R = s.R; + d.G = s.G; + d.B = s.B; + d.A = s.A; + return d; + } + + public static Argb32 ToArgb32(Bgra32 s) + { + Argb32 d = default; + d.R = s.R; + d.G = s.G; + d.B = s.B; + d.A = s.A; + return d; + } + + public static Rgba32 ToRgba32(Argb32 s) + { + Rgba32 d = default; + d.R = s.R; + d.G = s.G; + d.B = s.B; + d.A = s.A; + return d; + } + + public static Rgba32 ToRgba32(Bgra32 s) + { + Rgba32 d = default; + d.R = s.R; + d.G = s.G; + d.B = s.B; + d.A = s.A; + return d; + } + + public static Bgra32 ToBgra32(Rgba32 s) + { + Bgra32 d = default; + d.R = s.R; + d.G = s.G; + d.B = s.B; + d.A = s.A; + return d; + } + + public static Bgra32 ToBgra32(Argb32 s) + { + Bgra32 d = default; + d.R = s.R; + d.G = s.G; + d.B = s.B; + d.A = s.A; + return d; + } + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/PixelFormats/Rgba32Tests.cs b/tests/ImageSharp.Tests/PixelFormats/Rgba32Tests.cs index 8c702f66d..ad1d13740 100644 --- a/tests/ImageSharp.Tests/PixelFormats/Rgba32Tests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/Rgba32Tests.cs @@ -5,6 +5,7 @@ using System; using System.Numerics; using SixLabors.ImageSharp.PixelFormats; using Xunit; +using Xunit.Abstractions; namespace SixLabors.ImageSharp.Tests.PixelFormats {