From 57ccde4521cc87cf058d5eefb1b0a2dff749e70d Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sun, 17 Sep 2017 07:30:54 +0200 Subject: [PATCH] NormalizeColorsAndRoundAvx2() + JpegBlockPostProcessor cleanup --- .../Formats/Jpeg/Common/Block8x8F.cs | 60 ++++---- .../Common/Decoder/JpegBlockPostProcessor.cs | 129 ++++-------------- .../Decoder/JpegColorConverter.FromYCbCr.cs | 4 +- .../Jpeg/Common/FastFloatingPointDCT.cs | 3 + .../Formats/Jpg/Block8x8FTests.cs | 26 +++- 5 files changed, 87 insertions(+), 135 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs index 4ffb63480..6ba69bec4 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs @@ -529,34 +529,39 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common return result; } - public void RoundInplace() + public void NormalizeColorsAndRoundInplaceAvx2() + { + Vector off = new Vector(128f); + Vector max = new Vector(255F); + + ref Vector row0 = ref Unsafe.As>(ref this.V0L); + row0 = NormalizeAndRound(row0, off, max); + ref Vector row1 = ref Unsafe.As>(ref this.V1L); + row1 = NormalizeAndRound(row1, off, max); + ref Vector row2 = ref Unsafe.As>(ref this.V2L); + row2 = NormalizeAndRound(row2, off, max); + ref Vector row3 = ref Unsafe.As>(ref this.V3L); + row3 = NormalizeAndRound(row3, off, max); + ref Vector row4 = ref Unsafe.As>(ref this.V4L); + row4 = NormalizeAndRound(row4, off, max); + ref Vector row5 = ref Unsafe.As>(ref this.V5L); + row5 = NormalizeAndRound(row5, off, max); + ref Vector row6 = ref Unsafe.As>(ref this.V6L); + row6 = NormalizeAndRound(row6, off, max); + ref Vector row7 = ref Unsafe.As>(ref this.V7L); + row7 = NormalizeAndRound(row7, off, max); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector NormalizeAndRound(Vector row, Vector off, Vector max) { - if (Vector.Count == 8 && Vector.Count == 8) - { - ref Vector row0 = ref Unsafe.As>(ref this.V0L); - row0 = row0.FastRound(); - ref Vector row1 = ref Unsafe.As>(ref this.V1L); - row1 = row1.FastRound(); - ref Vector row2 = ref Unsafe.As>(ref this.V2L); - row2 = row2.FastRound(); - ref Vector row3 = ref Unsafe.As>(ref this.V3L); - row3 = row3.FastRound(); - ref Vector row4 = ref Unsafe.As>(ref this.V4L); - row4 = row4.FastRound(); - ref Vector row5 = ref Unsafe.As>(ref this.V5L); - row5 = row5.FastRound(); - ref Vector row6 = ref Unsafe.As>(ref this.V6L); - row6 = row6.FastRound(); - ref Vector row7 = ref Unsafe.As>(ref this.V7L); - row7 = row7.FastRound(); - } - else - { - this.RoundInplaceSlow(); - } + row += off; + row = Vector.Max(row, Vector.Zero); + row = Vector.Min(row, max); + return row.FastRound(); } - internal void RoundInplaceSlow() + public void RoundInplace() { for (int i = 0; i < Size; i++) { @@ -598,10 +603,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common DebugGuard.MustBeLessThan(idx, Size, nameof(idx)); DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx)); } - - [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(float))] - private struct Row - { - } } } diff --git a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs index 2db869de7..0459a5df0 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs @@ -14,14 +14,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder internal unsafe struct JpegBlockPostProcessor { /// - /// The + /// Source block /// - private ComputationData data; + public Block8x8F SourceBlock; /// - /// Pointers to elements of + /// Temporal block 1 to store intermediate and/or final computation results /// - private DataPointers pointers; + public Block8x8F WorkspaceBlock1; + + /// + /// Temporal block 2 to store intermediate and/or final computation results + /// + public Block8x8F WorkspaceBlock2; + + /// + /// The quantization table as + /// + public Block8x8F DequantiazationTable; private Size subSamplingDivisors; @@ -30,11 +40,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder /// public static void Init(JpegBlockPostProcessor* postProcessor, IRawJpegData decoder, IJpegComponent component) { - postProcessor->data = ComputationData.Create(); - postProcessor->pointers = new DataPointers(&postProcessor->data); - int qtIndex = component.QuantizationTableIndex; - postProcessor->data.DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]); + postProcessor->DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]); postProcessor->subSamplingDivisors = component.SubSamplingDivisors; } @@ -42,110 +49,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder ref Block8x8 sourceBlock, BufferArea destArea) { - sourceBlock.CopyToFloatBlock(ref this.data.SourceBlock); - - Block8x8F* b = this.pointers.SourceBlock; + ref Block8x8F b = ref this.SourceBlock; + sourceBlock.CopyToFloatBlock(ref b); // Dequantize: - b->MultiplyInplace(ref this.data.DequantiazationTable); + b.MultiplyInplace(ref this.DequantiazationTable); - FastFloatingPointDCT.TransformIDCT(ref *b, ref this.data.WorkspaceBlock1, ref this.data.WorkspaceBlock2); - - this.data.WorkspaceBlock1.NormalizeColorsInplace(); + FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. - // Unfortunately, we need to emulate this to be "more accurate" :( - this.data.WorkspaceBlock1.RoundInplace(); - - this.data.WorkspaceBlock1.CopyTo(destArea, this.subSamplingDivisors.Width, this.subSamplingDivisors.Height); - } - - /// - /// Holds the "large" data blocks needed for computations. - /// - [StructLayout(LayoutKind.Sequential)] - public struct ComputationData - { - /// - /// Source block - /// - public Block8x8F SourceBlock; - - /// - /// Temporal block 1 to store intermediate and/or final computation results - /// - public Block8x8F WorkspaceBlock1; - - /// - /// Temporal block 2 to store intermediate and/or final computation results - /// - public Block8x8F WorkspaceBlock2; - - /// - /// The quantization table as - /// - public Block8x8F DequantiazationTable; - - /// - /// The jpeg unzig data - /// - public ZigZag Unzig; - - /// - /// Creates and initializes a new instance - /// - /// The - public static ComputationData Create() + // To be "more accurate", we need to emulate this by rounding! + if (SimdUtils.IsAvx2CompatibleArchitecture) { - var data = default(ComputationData); - data.Unzig = ZigZag.CreateUnzigTable(); - return data; + this.WorkspaceBlock1.NormalizeColorsAndRoundInplaceAvx2(); } - } - - /// - /// Contains pointers to the memory regions of so they can be easily passed around to pointer based utility methods of - /// - public struct DataPointers - { - /// - /// Pointer to - /// - public Block8x8F* SourceBlock; - - /// - /// Pointer to - /// - public Block8x8F* WorkspaceBlock1; - - /// - /// Pointer to - /// - public Block8x8F* WorkspaceBlock2; - - /// - /// Pointer to - /// - public Block8x8F* DequantiazationTable; - - /// - /// Pointer to as int* - /// - public int* Unzig; - - /// - /// Initializes a new instance of the struct. - /// - /// Pointer to - internal DataPointers(ComputationData* dataPtr) + else { - this.SourceBlock = &dataPtr->SourceBlock; - this.WorkspaceBlock1 = &dataPtr->WorkspaceBlock1; - this.WorkspaceBlock2 = &dataPtr->WorkspaceBlock2; - this.DequantiazationTable = &dataPtr->DequantiazationTable; - this.Unzig = dataPtr->Unzig.Data; + this.WorkspaceBlock1.NormalizeColorsInplace(); + this.WorkspaceBlock1.RoundInplace(); } + + this.WorkspaceBlock1.CopyTo(destArea, this.subSamplingDivisors.Width, this.subSamplingDivisors.Height); } } } \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs index 693afc6f2..fef6d6438 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs @@ -123,12 +123,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder if (Vector.Count == 4) { - // TODO: Find a way to properly run & test this path on modern AVX2 PC-s! (Have I already mentioned that Vector is terrible?) + // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) r.RoundAndDownscaleBasic(); g.RoundAndDownscaleBasic(); b.RoundAndDownscaleBasic(); } - else if (Vector.Count == 8) + else if (SimdUtils.IsAvx2CompatibleArchitecture) { r.RoundAndDownscaleAvx2(); g.RoundAndDownscaleAvx2(); diff --git a/src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs index bdea14793..3ee6e72c5 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs @@ -50,7 +50,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common /// Temporary block provided by the caller public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) { + // TODO: Transpose is a bottleneck now. We need full AVX support to optimize it: + // https://github.com/dotnet/corefx/issues/22940 src.TransposeInto(ref temp); + IDCT8x4_LeftPart(ref temp, ref dest); IDCT8x4_RightPart(ref temp, ref dest); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 66c9bb3e1..1398b2f89 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -297,6 +297,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } } + [Theory] + [InlineData(1)] + [InlineData(2)] + public void NormalizeColorsAndRoundAvx2(int seed) + { + if (!SimdUtils.IsAvx2CompatibleArchitecture) + { + this.Output.WriteLine("AVX2 not supported, skipping!"); + return; + } + + Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); + + Block8x8F expected = source; + expected.NormalizeColorsInplace(); + expected.RoundInplace(); + + Block8x8F actual = source; + actual.NormalizeColorsAndRoundInplaceAvx2(); + + this.Output.WriteLine(expected.ToString()); + this.Output.WriteLine(actual.ToString()); + this.CompareBlocks(expected, actual, 0); + } [Theory] [InlineData(1)] @@ -352,7 +376,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(1)] [InlineData(2)] [InlineData(3)] - public void RoundInplace(int seed) + public void RoundInplaceSlow(int seed) { Block8x8F s = CreateRandomFloatBlock(-500, 500, seed);