From af7d96d21462e6e080488f7d1933d1430834fadd Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Mon, 15 Oct 2018 01:11:59 +0200 Subject: [PATCH] SIMD byte -> float conversion: BulkConvertByteToNormalizedFloatFast --- src/ImageSharp/Common/Extensions/SimdUtils.cs | 45 +++++++++++++++++-- .../Color/Bulk/ToVector4.cs | 16 +++++-- .../ImageSharp.Benchmarks.csproj | 2 +- .../PixelFormats/PixelOperationsTests.cs | 24 ++++++++++ .../Tests/TestEnvironmentTests.cs | 2 + 5 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Common/Extensions/SimdUtils.cs b/src/ImageSharp/Common/Extensions/SimdUtils.cs index db1e80dda..56118a764 100644 --- a/src/ImageSharp/Common/Extensions/SimdUtils.cs +++ b/src/ImageSharp/Common/Extensions/SimdUtils.cs @@ -105,13 +105,50 @@ namespace SixLabors.ImageSharp } } - internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest) + /// + /// Fast -> conversion for RyuJIT runtimes having dotnet/coreclr#10662 merged. + /// + /// https://github.com/dotnet/coreclr/pull/10662 + /// + /// + internal static void BulkConvertByteToNormalizedFloatFast(ReadOnlySpan source, Span dest) { - if (!Vector.IsHardwareAccelerated) + Guard.IsTrue( + source.Length % Vector.Count == 0, + nameof(source), + "dest.Length should be divisable by Vector.Count!"); + + int n = source.Length / Vector.Count; + + ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = new Vector(1f / 255f); + + for (int i = 0; i < n; i++) { - throw new InvalidOperationException( - "Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!"); + Vector b = Unsafe.Add(ref sourceBase, i); + + Vector.Widen(b, out Vector s0, out Vector s1); + Vector.Widen(s0, out Vector w0, out Vector w1); + Vector.Widen(s1, out Vector w2, out Vector w3); + + Vector f0 = Vector.ConvertToSingle(w0) * scale; + Vector f1 = Vector.ConvertToSingle(w1) * scale; + Vector f2 = Vector.ConvertToSingle(w2) * scale; + Vector f3 = Vector.ConvertToSingle(w3) * scale; + + ref Vector d = ref Unsafe.Add(ref destBase, i * 4); + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; } + } + + internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest) + { + GuardAvx2(nameof(BulkConvertByteToNormalizedFloat)); DebugGuard.IsTrue((dest.Length % Vector.Count) == 0, nameof(source), "dest.Length should be divisable by Vector.Count!"); diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs index 7b6f902d8..0e5e9d94f 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs @@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Params( //64, - 1024)] + 2048)] public int Count { get; set; } [GlobalSetup] @@ -72,14 +72,14 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } [CoreJob] - //[ClrJob] + [ClrJob] public class ToVector4_Rgba32 : ToVector4 { class Config : ManualConfig { } - [Benchmark] + //[Benchmark] public void BulkConvertByteToNormalizedFloat() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); @@ -87,5 +87,15 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.BulkConvertByteToNormalizedFloat(sBytes, dFloats); } + + [Benchmark] + public void BulkConvertByteToNormalizedFloatFast() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.BulkConvertByteToNormalizedFloatFast(sBytes, dFloats); + } + } } \ No newline at end of file diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index 36b7d4db4..e470e7821 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -1,6 +1,6 @@  - netcoreapp2.0;net461 + netcoreapp2.1;net461 Exe True SixLabors.ImageSharp.Benchmarks diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs index a96da03e7..2e84886c0 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs @@ -73,6 +73,30 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats ); } + [Fact] + public void BulkConvertByteToNormalizedFloatFast() + { + if (!Vector.IsHardwareAccelerated) + { + return; + } + + ImageSharp.PixelFormats.Rgba32[] source = CreatePixelTestData(128); + Vector4[] expected = CreateExpectedVector4Data(source); + + TestOperation( + source, + expected, + (s, d) => + { + ReadOnlySpan sBytes = MemoryMarshal.Cast(s); + Span dFloats = MemoryMarshal.Cast(d.Memory.Span); + + SimdUtils.BulkConvertByteToNormalizedFloatFast(sBytes, dFloats); + } + ); + } + // [Fact] // Profiling benchmark - enable manually! #pragma warning disable xUnit1013 // Public method should be marked as test diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs index 8a3e69059..30bb16c2a 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs @@ -3,6 +3,8 @@ using System; using System.IO; +using System.Reflection; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Formats;