From 6baff43cb73d08ca0a752178cebf47becbbc342c Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Tue, 17 Jan 2017 05:02:11 +0100 Subject: [PATCH] Optimized UnzigDivRound --- .../Components/Block8x8F.cs | 60 +++++++++++++++++- .../JpegEncoderCore.cs | 31 +++------- .../General/RoundSinglePrecisionBlocks.cs | 1 - .../General/Vector4Constants.cs | 61 +++++++++++++++++++ .../Formats/Jpg/Block8x8FTests.cs | 31 ++++++++++ .../Formats/Jpg/ReferenceImplementations.cs | 26 ++++++++ 6 files changed, 186 insertions(+), 24 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/General/Vector4Constants.cs diff --git a/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs b/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs index e21ba2d02..ec0feaf45 100644 --- a/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs @@ -327,10 +327,11 @@ namespace ImageSharp.Formats.Jpg } /// - /// Unzig the elements of src into dest, while dividing them by elements of qt and rounding the values + /// Unzig the elements of src into dest, while dividing them by elements of qt and rounding the values. + /// Sore the result to the memory area pointed by dest. /// /// Source block - /// Destination block + /// Destination block of integers /// Quantization table /// Pointer to elements // [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -349,6 +350,31 @@ namespace ImageSharp.Formats.Jpg } } + /// + /// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values. + /// To finish the rounding it's enough to (int)-cast these values. + /// + /// Source block + /// Destination block + /// The quantization table + /// Pointer to elements of + public static unsafe void UnzigDivRound( + Block8x8F* block, + Block8x8F* dest, + Block8x8F* qt, + int* unzigPtr) + { + float* s = (float*)block; + float* d = (float*)dest; + + for (int zig = 0; zig < ScalarCount; zig++) + { + d[zig] = s[unzigPtr[zig]]; + } + + DivideRoundAll(ref *dest, ref *qt); + } + /// /// Scales the 16x16 region represented by the 4 source blocks to the 8x8 DST block. /// @@ -391,5 +417,35 @@ namespace ImageSharp.Formats.Jpg return -((-dividend + (divisor >> 1)) / divisor); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) + { + a.V0L = DivideRound(a.V0L, b.V0L); + a.V0R = DivideRound(a.V0R, b.V0R); + a.V1L = DivideRound(a.V1L, b.V1L); + a.V1R = DivideRound(a.V1R, b.V1R); + a.V2L = DivideRound(a.V2L, b.V2L); + a.V2R = DivideRound(a.V2R, b.V2R); + a.V3L = DivideRound(a.V3L, b.V3L); + a.V3R = DivideRound(a.V3R, b.V3R); + a.V4L = DivideRound(a.V4L, b.V4L); + a.V4R = DivideRound(a.V4R, b.V4R); + a.V5L = DivideRound(a.V5L, b.V5L); + a.V5R = DivideRound(a.V5R, b.V5R); + a.V6L = DivideRound(a.V6L, b.V6L); + a.V6R = DivideRound(a.V6R, b.V6R); + a.V7L = DivideRound(a.V7L, b.V7L); + a.V7R = DivideRound(a.V7R, b.V7R); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor) + { + Vector4 sign = Vector4.Min(dividend, Vector4.One); + sign = Vector4.Max(sign, new Vector4(-1)); + + return (dividend / divisor) + (sign * new Vector4(0.5f)); + } } } \ No newline at end of file diff --git a/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs b/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs index aa7f2495d..b0e442eaa 100644 --- a/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs @@ -452,8 +452,6 @@ namespace ImageSharp.Formats // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; - int* unzigDest = stackalloc int[Block8x8F.ScalarCount]; - using (PixelArea rgbBytes = new PixelArea(8, 8, ComponentOrder.Xyz)) { for (int y = 0; y < pixels.Height; y += 8) @@ -467,7 +465,6 @@ namespace ImageSharp.Formats prevDCY, &b, &temp1, - unzigDest, &temp2, &onStackLuminanceQuantTable, unzig.Data); @@ -476,7 +473,6 @@ namespace ImageSharp.Formats prevDCCb, &cb, &temp1, - unzigDest, &temp2, &onStackChrominanceQuantTable, unzig.Data); @@ -485,7 +481,6 @@ namespace ImageSharp.Formats prevDCCr, &cr, &temp1, - unzigDest, &temp2, &onStackChrominanceQuantTable, unzig.Data); @@ -542,9 +537,8 @@ namespace ImageSharp.Formats /// The quantization table index. /// The previous DC value. /// Source block - /// Temporal block to be used as FDCT Destination - /// Working buffer for unzigged stuff - /// Temporal block 2 + /// Temporal block to be used as FDCT Destination + /// Temporal block 2 /// Quantization table /// The 8x8 Unzig block pointer /// @@ -554,19 +548,19 @@ namespace ImageSharp.Formats QuantIndex index, int prevDC, Block8x8F* src, - Block8x8F* tempDest, - int* d, - Block8x8F* tempWorker, + Block8x8F* tempDest1, + Block8x8F* tempDest2, Block8x8F* quant, int* unzigPtr) { - DCT.TransformFDCT(ref *src, ref *tempDest, ref *tempWorker); + DCT.TransformFDCT(ref *src, ref *tempDest1, ref *tempDest2); - Block8x8F.UnZigDivRound(tempDest, d, quant, unzigPtr); + Block8x8F.UnzigDivRound(tempDest1, tempDest2, quant, unzigPtr); + float* unziggedDestPtr = (float*)tempDest2; - // Emit the DC delta. - int dc = d[0]; + int dc = (int)unziggedDestPtr[0]; + // Emit the DC delta. this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC); // Emit the AC components. @@ -575,7 +569,7 @@ namespace ImageSharp.Formats for (int zig = 1; zig < Block8x8F.ScalarCount; zig++) { - int ac = d[zig]; + int ac = (int)unziggedDestPtr[zig]; if (ac == 0) { @@ -823,8 +817,6 @@ namespace ImageSharp.Formats UnzigData unzig = UnzigData.Create(); - int* unzigDest = stackalloc int[Block8x8F.ScalarCount]; - // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; @@ -846,7 +838,6 @@ namespace ImageSharp.Formats prevDCY, &b, &temp1, - unzigDest, &temp2, &onStackLuminanceQuantTable, unzig.Data); @@ -858,7 +849,6 @@ namespace ImageSharp.Formats prevDCCb, &b, &temp1, - unzigDest, &temp2, &onStackChrominanceQuantTable, unzig.Data); @@ -869,7 +859,6 @@ namespace ImageSharp.Formats prevDCCr, &b, &temp1, - unzigDest, &temp2, &onStackChrominanceQuantTable, unzig.Data); diff --git a/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs b/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs index 8c104dff0..52880168d 100644 --- a/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs +++ b/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs @@ -96,7 +96,6 @@ Block8x8F bDividend = this.inputDividend; Block8x8F bDivisor = this.inputDivisior; float* pDividend = (float*)&bDividend; - float* pDivisor = (float*)&bDivisor; for (int cnt = 0; cnt < ExecutionCount; cnt++) { diff --git a/tests/ImageSharp.Benchmarks/General/Vector4Constants.cs b/tests/ImageSharp.Benchmarks/General/Vector4Constants.cs new file mode 100644 index 000000000..a7afa336e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/Vector4Constants.cs @@ -0,0 +1,61 @@ +namespace ImageSharp.Benchmarks.General +{ + using System; + using System.Numerics; + + using BenchmarkDotNet.Attributes; + + /// + /// Has it any effect on performance to store SIMD constants as static readonly fields? Is it OK to always inline them? + /// Spoiler: the difference seems to be statistically insignificant! + /// + public class Vector4Constants + { + private static readonly Vector4 A = new Vector4(1.2f); + private static readonly Vector4 B = new Vector4(3.4f); + private static readonly Vector4 C = new Vector4(5.6f); + private static readonly Vector4 D = new Vector4(7.8f); + + private Random random = null; + + private Vector4 parameter; + + [Setup] + public void Setup() + { + this.random = new Random(42); + this.parameter = new Vector4( + this.GetRandomFloat(), + this.GetRandomFloat(), + this.GetRandomFloat(), + this.GetRandomFloat() + ); + } + + [Benchmark(Baseline = true)] + public Vector4 Static() + { + Vector4 p = this.parameter; + + Vector4 x = p * A / B + p * C / D; + Vector4 y = p / A * B + p / C * D; + Vector4 z = Vector4.Min(p, A); + Vector4 w = Vector4.Max(p, B); + return x + y + z + w; + } + + [Benchmark] + public Vector4 Inlined() + { + Vector4 p = this.parameter; + + Vector4 x = p * new Vector4(1.2f) / new Vector4(2.3f) + p * new Vector4(4.5f) / new Vector4(6.7f); + Vector4 y = p / new Vector4(1.2f) * new Vector4(2.3f) + p / new Vector4(4.5f) * new Vector4(6.7f); + Vector4 z = Vector4.Min(p, new Vector4(1.2f)); + Vector4 w = Vector4.Max(p, new Vector4(2.3f)); + return x + y + z + w; + } + + private float GetRandomFloat() => (float)this.random.NextDouble(); + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 1f055bab4..690a8b620 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -12,6 +12,8 @@ namespace ImageSharp.Tests { using System.Diagnostics; using System.Numerics; + + using ImageSharp.Formats; using ImageSharp.Formats.Jpg; using Xunit; @@ -431,5 +433,34 @@ namespace ImageSharp.Tests Assert.Equal(actualDest.Data, expectedDest.Data, new ApproximateFloatComparer(1f)); } + + [Theory] + [InlineData(1)] + [InlineData(2)] + public unsafe void UnzigDivRound(int seed) + { + Block8x8F block = new Block8x8F(); + block.LoadFrom(Create8x8RandomFloatData(-2000, 2000, seed)); + + Block8x8F qt = new Block8x8F(); + qt.LoadFrom(Create8x8RandomFloatData(-2000, 2000, seed)); + + UnzigData unzig = UnzigData.Create(); + + int* expectedResults = stackalloc int[Block8x8F.ScalarCount]; + ReferenceImplementations.UnZigDivRoundRational(&block, expectedResults, &qt, unzig.Data); + + Block8x8F actualResults = default(Block8x8F); + + Block8x8F.UnzigDivRound(&block, &actualResults, &qt, unzig.Data); + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + int expected = expectedResults[i]; + int actual = (int)actualResults[i]; + + Assert.Equal(expected, actual); + } + } } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs index 60c136674..06882719c 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs @@ -875,5 +875,31 @@ namespace ImageSharp.Tests } } } + + public static unsafe void UnZigDivRoundRational(Block8x8F* src, int* dest, Block8x8F* qt, int* unzigPtr) + { + float* s = (float*)src; + float* q = (float*)qt; + + for (int zig = 0; zig < Block8x8F.ScalarCount; zig++) + { + int a = (int)s[unzigPtr[zig]]; + int b = (int)q[zig]; + + int val = RationalRound(a, b); + dest[zig] = val; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int RationalRound(int dividend, int divisor) + { + if (dividend >= 0) + { + return (dividend + (divisor >> 1)) / divisor; + } + + return -((-dividend + (divisor >> 1)) / divisor); + } } } \ No newline at end of file