From 3e73bd8cf1b75d28000c934d9af85136fa16c3ed Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Wed, 13 Sep 2017 02:52:15 +0200 Subject: [PATCH] speedup Block8x8F.RoundInplace() --- src/ImageSharp/Common/Extensions/SimdUtils.cs | 45 +++++++ .../Common/Extensions/Vector4Extensions.cs | 11 -- .../Formats/Jpeg/Common/Block8x8F.cs | 47 ++++++- ...sionBlocks.cs => Block8x8F_DivideRound.cs} | 3 +- .../General/Block8x8F_Round.cs | 67 ++++++++++ .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 119 ++++++++++++++++++ .../Formats/Jpg/Block8x8FTests.cs | 23 ++++ .../Formats/Jpg/JpegColorConverterTests.cs | 30 ----- 8 files changed, 302 insertions(+), 43 deletions(-) create mode 100644 src/ImageSharp/Common/Extensions/SimdUtils.cs rename tests/ImageSharp.Benchmarks/General/{RoundSinglePrecisionBlocks.cs => Block8x8F_DivideRound.cs} (98%) create mode 100644 tests/ImageSharp.Benchmarks/General/Block8x8F_Round.cs create mode 100644 tests/ImageSharp.Tests/Common/SimdUtilsTests.cs diff --git a/src/ImageSharp/Common/Extensions/SimdUtils.cs b/src/ImageSharp/Common/Extensions/SimdUtils.cs new file mode 100644 index 0000000000..5972000728 --- /dev/null +++ b/src/ImageSharp/Common/Extensions/SimdUtils.cs @@ -0,0 +1,45 @@ +// Copyright (c) Six Labors and contributors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp +{ + /// + /// Various extension and utility methods for and utilizing SIMD capabilities + /// + internal static class SimdUtils + { + /// + /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector4 PseudoRound(this Vector4 v) + { + var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1)); + + return v + (sign * 0.5f); + } + + /// + /// Rounds all values in 'v' to the nearest integer following semantics. + /// Source: + /// + /// https://github.com/tmpvar/voxviz/blob/master/deps/glm/glm/simd/common.h#L110 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector FastRound(this Vector x) + { + Vector magic0 = new Vector(-2147483648); // 0x80000000 + Vector sgn0 = Vector.AsVectorSingle(magic0); + Vector and0 = Vector.BitwiseAnd(sgn0, x); + Vector or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f)); + Vector add0 = Vector.Add(x, or0); + Vector sub0 = Vector.Subtract(add0, or0); + return sub0; + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Extensions/Vector4Extensions.cs b/src/ImageSharp/Common/Extensions/Vector4Extensions.cs index 1809dd329b..5fbc3960a3 100644 --- a/src/ImageSharp/Common/Extensions/Vector4Extensions.cs +++ b/src/ImageSharp/Common/Extensions/Vector4Extensions.cs @@ -79,16 +79,5 @@ namespace SixLabors.ImageSharp return MathF.Pow((signal + 0.055F) / 1.055F, 2.4F); } - - /// - /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector4 PseudoRound(this Vector4 v) - { - var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1)); - - return v + (sign * 0.5f); - } } } diff --git a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs index 474c75adf1..045a1f527a 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Text; using SixLabors.ImageSharp.Memory; // ReSharper disable InconsistentNaming @@ -609,8 +610,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common return result; } - // TODO: Optimize this! public void RoundInplace() + { + if (Vector.Count == 8) + { + ref Vector row0 = ref Unsafe.As>(ref this.V0L); + row0 = row0.FastRound(); + ref Vector row1 = ref Unsafe.As>(ref this.V1L); + row1 = row1.FastRound(); + ref Vector row2 = ref Unsafe.As>(ref this.V2L); + row2 = row2.FastRound(); + ref Vector row3 = ref Unsafe.As>(ref this.V3L); + row3 = row3.FastRound(); + ref Vector row4 = ref Unsafe.As>(ref this.V4L); + row4 = row4.FastRound(); + ref Vector row5 = ref Unsafe.As>(ref this.V5L); + row5 = row5.FastRound(); + ref Vector row6 = ref Unsafe.As>(ref this.V6L); + row6 = row6.FastRound(); + ref Vector row7 = ref Unsafe.As>(ref this.V7L); + row7 = row7.FastRound(); + } + else + { + this.RoundInplaceSlow(); + } + } + + private void RoundInplaceSlow() { for (int i = 0; i < Size; i++) { @@ -618,6 +645,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common } } + /// + public override string ToString() + { + var bld = new StringBuilder(); + bld.Append('['); + for (int i = 0; i < Size; i++) + { + bld.Append(this[i]); + if (i < Size - 1) + { + bld.Append(','); + } + } + + bld.Append(']'); + return bld.ToString(); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor) { diff --git a/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs b/tests/ImageSharp.Benchmarks/General/Block8x8F_DivideRound.cs similarity index 98% rename from tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs rename to tests/ImageSharp.Benchmarks/General/Block8x8F_DivideRound.cs index 044e973a90..bad87cc11a 100644 --- a/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs +++ b/tests/ImageSharp.Benchmarks/General/Block8x8F_DivideRound.cs @@ -6,6 +6,7 @@ using System.Runtime.CompilerServices; using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Common; +// ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Benchmarks.General { @@ -15,7 +16,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General /// - Divide each float pair, round the result /// - Iterate through all rounded values as int-s /// - public unsafe class RoundSinglePrecisionBlocks + public unsafe class Block8x8F_DivideRound { private const int ExecutionCount = 5; // Added this to reduce the effect of copying the blocks private static readonly Vector4 MinusOne = new Vector4(-1); diff --git a/tests/ImageSharp.Benchmarks/General/Block8x8F_Round.cs b/tests/ImageSharp.Benchmarks/General/Block8x8F_Round.cs new file mode 100644 index 0000000000..2739877a6e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/Block8x8F_Round.cs @@ -0,0 +1,67 @@ +// ReSharper disable InconsistentNaming + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +using BenchmarkDotNet.Attributes; + +using SixLabors.ImageSharp; +using SixLabors.ImageSharp.Formats.Jpeg.Common; + +namespace SixLabors.ImageSharp.Benchmarks.General +{ + public class Block8x8F_Round + { + private Block8x8F block = default(Block8x8F); + + [GlobalSetup] + public void Setup() + { + if (Vector.Count != 8) + { + throw new NotSupportedException("Vector.Count != 8"); + } + + for (int i = 0; i < Block8x8F.Size; i++) + { + this.block[i] = i * 44.8f; + } + } + + [Benchmark(Baseline = true)] + public void ScalarRound() + { + ref float b = ref Unsafe.As(ref this.block); + + for (int i = 0; i < Block8x8F.Size; i++) + { + ref float v = ref Unsafe.Add(ref b, i); + v = MathF.Round(v); + } + } + + [Benchmark] + public void SimdRound() + { + ref Block8x8F b = ref this.block; + + ref Vector row0 = ref Unsafe.As>(ref b.V0L); + row0 = SimdUtils.FastRound(row0); + ref Vector row1 = ref Unsafe.As>(ref b.V1L); + row1 = SimdUtils.FastRound(row1); + ref Vector row2 = ref Unsafe.As>(ref b.V2L); + row2 = SimdUtils.FastRound(row2); + ref Vector row3 = ref Unsafe.As>(ref b.V3L); + row3 = SimdUtils.FastRound(row3); + ref Vector row4 = ref Unsafe.As>(ref b.V4L); + row4 = SimdUtils.FastRound(row4); + ref Vector row5 = ref Unsafe.As>(ref b.V5L); + row5 = SimdUtils.FastRound(row5); + ref Vector row6 = ref Unsafe.As>(ref b.V6L); + row6 = SimdUtils.FastRound(row6); + ref Vector row7 = ref Unsafe.As>(ref b.V7L); + row7 = SimdUtils.FastRound(row7); + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs new file mode 100644 index 0000000000..32a783f3ba --- /dev/null +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -0,0 +1,119 @@ +using System; +using System.Numerics; +using Xunit; +// ReSharper disable InconsistentNaming + +namespace SixLabors.ImageSharp.Tests.Common +{ + using Xunit.Abstractions; + + public class SimdUtilsTests + { + private ITestOutputHelper Output { get; } + + public SimdUtilsTests(ITestOutputHelper output) + { + this.Output = output; + } + + private static int R(float f) => (int)MathF.Round(f, MidpointRounding.AwayFromZero); + + private static int Re(float f) => (int)MathF.Round(f, MidpointRounding.ToEven); + + // TODO: Move this to a proper test class! + [Theory] + [InlineData(0.32, 54.5, -3.5, -4.1)] + [InlineData(5.3, 536.4, 4.5, 8.1)] + public void PseudoRound(float x, float y, float z, float w) + { + var v = new Vector4(x, y, z, w); + + Vector4 actual = v.PseudoRound(); + + Assert.Equal( + R(v.X), + (int)actual.X + ); + Assert.Equal( + R(v.Y), + (int)actual.Y + ); + Assert.Equal( + R(v.Z), + (int)actual.Z + ); + Assert.Equal( + R(v.W), + (int)actual.W + ); + } + + private static Vector CreateExactTestVector1() + { + float[] data = new float[Vector.Count]; + + data[0] = 0.1f; + data[1] = 0.4f; + data[2] = 0.5f; + data[3] = 0.9f; + + for (int i = 4; i < Vector.Count; i++) + { + data[i] = data[i - 4] + 100f; + } + return new Vector(data); + } + + private static Vector CreateRandomTestVector(int seed, float scale) + { + float[] data = new float[Vector.Count]; + Random rnd = new Random(); + for (int i = 0; i < Vector.Count; i++) + { + float v = (float)rnd.NextDouble() - 0.5f; + v *= 2 * scale; + data[i] = v; + } + return new Vector(data); + } + + [Fact] + public void Round() + { + Vector v = CreateExactTestVector1(); + Vector r = v.FastRound(); + + this.Output.WriteLine(r.ToString()); + + AssertEvenRoundIsCorrect(r, v); + } + + [Theory] + [InlineData(1, 1f)] + [InlineData(1, 10f)] + [InlineData(1, 1000f)] + [InlineData(42, 1f)] + [InlineData(42, 10f)] + [InlineData(42, 1000f)] + public void Round_RandomValues(int seed, float scale) + { + Vector v = CreateRandomTestVector(seed, scale); + Vector r = v.FastRound(); + + this.Output.WriteLine(v.ToString()); + this.Output.WriteLine(r.ToString()); + + AssertEvenRoundIsCorrect(r, v); + } + + private static void AssertEvenRoundIsCorrect(Vector r, Vector v) + { + for (int i = 0; i < Vector.Count; i++) + { + int actual = (int)r[i]; + int expected = Re(v[i]); + Assert.Equal(expected, actual); + } + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 84c66de396..52c38bee83 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -347,5 +347,28 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(expectedShort, actualShort); } } + + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + public void RoundInplace(int seed) + { + Block8x8F s = CreateRandomFloatBlock(-500, 500, seed); + + Block8x8F d = s; + d.RoundInplace(); + + this.Output.WriteLine(s.ToString()); + this.Output.WriteLine(d.ToString()); + + for (int i = 0; i < 64; i++) + { + float expected = MathF.Round(s[i]); + float actual = d[i]; + + Assert.Equal(expected, actual); + } + } } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index e9db65105a..50746f6835 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -33,36 +33,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg private ITestOutputHelper Output { get; } - private static int R(float f) => (int)MathF.Round(f, MidpointRounding.AwayFromZero); - - // TODO: Move this to a proper test class! - [Theory] - [InlineData(0.32, 54.5, -3.5, -4.1)] - [InlineData(5.3, 536.4, 4.5, 8.1)] - public void Vector4_PseudoRound(float x, float y, float z, float w) - { - var v = new Vector4(x, y, z, w); - - Vector4 actual = v.PseudoRound(); - - Assert.Equal( - R(v.X), - (int)actual.X - ); - Assert.Equal( - R(v.Y), - (int)actual.Y - ); - Assert.Equal( - R(v.Z), - (int)actual.Z - ); - Assert.Equal( - R(v.W), - (int)actual.W - ); - } - [Theory] [MemberData(nameof(CommonConversionData))] public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed)