From 7f113ab00a03bca62b48499db366d17f68809f4d Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Thu, 1 Nov 2018 16:00:17 +0100 Subject: [PATCH] AVX2 optimized Block8x8 -> Block8x8F conversion --- src/ImageSharp/Common/Helpers/DebugGuard.cs | 14 ++++++ .../Common/Helpers/InliningOptions.cs | 2 +- .../Helpers/SimdUtils.ExtendedIntrinsics.cs | 14 ++++++ .../Jpeg/Components/Block8x8F.Generated.cs | 2 +- .../Jpeg/Components/Block8x8F.Generated.tt | 2 +- .../Formats/Jpeg/Components/Block8x8F.cs | 45 +++++++++++++++++++ .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 21 +++++++++ .../Formats/Jpg/Block8x8FTests.cs | 42 +++++++++++++++++ .../Formats/Jpg/JpegProfilingBenchmarks.cs | 4 +- .../Formats/Jpg/Utils/JpegFixture.cs | 7 ++- tests/ImageSharp.Tests/ProfilingBenchmarks.cs | 2 +- .../TestUtilities/TestDataGenerator.cs | 11 +++++ 12 files changed, 159 insertions(+), 7 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/DebugGuard.cs b/src/ImageSharp/Common/Helpers/DebugGuard.cs index 2cf18b245..43eebeac8 100644 --- a/src/ImageSharp/Common/Helpers/DebugGuard.cs +++ b/src/ImageSharp/Common/Helpers/DebugGuard.cs @@ -163,6 +163,20 @@ namespace SixLabors.ImageSharp } } + /// + /// Verifies whether a specific condition is met, throwing an exception if it's false. + /// + /// The condition + /// The error message + [Conditional("DEBUG")] + public static void IsTrue(bool target, string message) + { + if (!target) + { + throw new InvalidOperationException(message); + } + } + /// /// Verifies, that the method parameter with specified target value is false /// and throws an exception if it is found to be so. diff --git a/src/ImageSharp/Common/Helpers/InliningOptions.cs b/src/ImageSharp/Common/Helpers/InliningOptions.cs index ad85c4fc8..f61e4f8ae 100644 --- a/src/ImageSharp/Common/Helpers/InliningOptions.cs +++ b/src/ImageSharp/Common/Helpers/InliningOptions.cs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. // Uncomment this for verbose profiler results: -// #define PROFILING +#define PROFILING using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs index 2ac577264..463961d86 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs @@ -25,6 +25,20 @@ namespace SixLabors.ImageSharp false; #endif + /// + /// Widen and convert a vector of values into 2 vectors of -s. + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void ConvertToSingle( + Vector source, + out Vector dest1, + out Vector dest2) + { + Vector.Widen(source, out Vector i1, out Vector i2); + dest1 = Vector.ConvertToSingle(i1); + dest2 = Vector.ConvertToSingle(i2); + } + /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index 53f29734c..09ed6408d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -154,7 +154,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Fill the block from 'source' doing short -> float conversion. /// - public void LoadFrom(ref Block8x8 source) + public void LoadFromInt16Scalar(ref Block8x8 source) { ref short selfRef = ref Unsafe.As(ref source); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index 76c61f6c3..f93ee6522 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -104,7 +104,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Fill the block from 'source' doing short -> float conversion. /// - public void LoadFrom(ref Block8x8 source) + public void LoadFromInt16Scalar(ref Block8x8 source) { ref short selfRef = ref Unsafe.As(ref source); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 3a912dc62..137a8029d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -493,6 +493,51 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } } + [MethodImpl(InliningOptions.ShortMethod)] + public void LoadFrom(ref Block8x8 source) + { +#if SUPPORTS_EXTENDED_INTRINSICS + if (SimdUtils.IsAvx2CompatibleArchitecture) + { + this.LoadFromInt16ExtendedAvx2(ref source); + return; + } +#endif + this.LoadFromInt16Scalar(ref source); + } + + /// + /// Loads values from using extended AVX2 intrinsics. + /// + /// The source + public void LoadFromInt16ExtendedAvx2(ref Block8x8 source) + { + DebugGuard.IsTrue( + SimdUtils.IsAvx2CompatibleArchitecture, + "LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!"); + + ref Vector sRef = ref Unsafe.As>(ref source); + ref Vector dRef = ref Unsafe.As>(ref this); + + // Vector.Count == 16 on AVX2 + // We can process 2 block rows in a single step + SimdUtils.ExtendedIntrinsics.ConvertToSingle(sRef, out Vector top, out Vector bottom); + dRef = top; + Unsafe.Add(ref dRef, 1) = bottom; + + SimdUtils.ExtendedIntrinsics.ConvertToSingle(Unsafe.Add(ref sRef, 1), out top, out bottom); + Unsafe.Add(ref dRef, 2) = top; + Unsafe.Add(ref dRef, 3) = bottom; + + SimdUtils.ExtendedIntrinsics.ConvertToSingle(Unsafe.Add(ref sRef, 2), out top, out bottom); + Unsafe.Add(ref dRef, 4) = top; + Unsafe.Add(ref dRef, 5) = bottom; + + SimdUtils.ExtendedIntrinsics.ConvertToSingle(Unsafe.Add(ref sRef, 3), out top, out bottom); + Unsafe.Add(ref dRef, 6) = top; + Unsafe.Add(ref dRef, 7) = bottom; + } + /// public override string ToString() { diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index c63cb3438..4f8a2cdaf 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -257,6 +257,27 @@ namespace SixLabors.ImageSharp.Tests.Common ); } + [Theory] + [InlineData(1234)] + public void ExtendedIntrinsics_ConvertToSingle(short scale) + { + int n = Vector.Count; + short[] sData = new Random(scale).GenerateRandomInt16Array(2 * n, (short)-scale, scale); + float[] fData = sData.Select(u => (float)u).ToArray(); + + var source = new Vector(sData); + + var expected1 = new Vector(fData, 0); + var expected2 = new Vector(fData, n); + + // Act: + SimdUtils.ExtendedIntrinsics.ConvertToSingle(source, out Vector actual1, out Vector actual2); + + // Assert: + Assert.Equal(expected1, actual1); + Assert.Equal(expected2, actual2); + } + [Theory] [MemberData(nameof(ArbitraryArraySizes))] public void BulkConvertNormalizedFloatToByteClampOverflows(int count) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index e72f4945b..81c76390c 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -408,5 +408,47 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(original[i] * 42f, actual[i]); } } + + [Fact] + public void LoadFromUInt16Scalar() + { + if (this.SkipOnNonAvx2Runner()) + { + return; + } + + short[] data = Create8x8ShortData(); + + var source = new Block8x8(data); + + Block8x8F dest = default; + dest.LoadFromInt16Scalar(ref source); + + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.Equal((float)data[i], dest[i]); + } + } + + [Fact] + public void LoadFromUInt16ExtendedAvx2() + { + if (this.SkipOnNonAvx2Runner()) + { + return; + } + + short[] data = Create8x8ShortData(); + + var source = new Block8x8(data); + + Block8x8F dest = default; + dest.LoadFromInt16ExtendedAvx2(ref source); + + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.Equal((float)data[i], dest[i]); + } + } } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegProfilingBenchmarks.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegProfilingBenchmarks.cs index f60f478e1..7d5130e1b 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegProfilingBenchmarks.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegProfilingBenchmarks.cs @@ -32,8 +32,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestImages.Jpeg.Baseline.Jpeg444, }; - [Theory] // Benchmark, enable manually - [MemberData(nameof(DecodeJpegData))] + //[Theory] // Benchmark, enable manually + //[MemberData(nameof(DecodeJpegData))] public void DecodeJpeg(string fileName) { this.DecodeJpegBenchmarkImpl(fileName, new JpegDecoder()); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs b/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs index d14fbc3fc..89fdd5745 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs @@ -58,7 +58,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils { for (int j = 0; j < 8; j++) { - result[i * 8 + j] = (short)(i * 10 + j); + short val = (short)(i * 10 + j); + if ((i + j) % 2 == 0) + { + val *= -1; + } + result[i * 8 + j] = val; } } return result; diff --git a/tests/ImageSharp.Tests/ProfilingBenchmarks.cs b/tests/ImageSharp.Tests/ProfilingBenchmarks.cs index fa873ef85..bc9b2a947 100644 --- a/tests/ImageSharp.Tests/ProfilingBenchmarks.cs +++ b/tests/ImageSharp.Tests/ProfilingBenchmarks.cs @@ -13,7 +13,7 @@ namespace SixLabors.ImageSharp.Tests public class ProfilingBenchmarks : MeasureFixture { public const string SkipProfilingTests = -#if false +#if true null; #else "Profiling benchmark, enable manually!"; diff --git a/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs b/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs index 56cde41fc..e3d8bf380 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs @@ -88,6 +88,17 @@ namespace SixLabors.ImageSharp.Tests return values; } + public static short[] GenerateRandomInt16Array(this Random rnd, int length, short minVal, short maxVal) + { + short[] values = new short[length]; + for (int i = 0; i < values.Length; i++) + { + values[i] = (short)rnd.Next(minVal, maxVal); + } + + return values; + } + private static float GetRandomFloat(this Random rnd, float minVal, float maxVal) => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; } } \ No newline at end of file