From cc99da35bf20804ae57000e15bb75b4c330a8679 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 29 Aug 2021 05:35:58 +0300 Subject: [PATCH] Added DCT in place --- .../Decoder/JpegBlockPostProcessor.cs | 24 ++++------ .../Components/Encoder/HuffmanScanEncoder.cs | 22 +++++----- .../Jpeg/Components/FastFloatingPointDCT.cs | 44 +++++++++++++++---- .../ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 2 +- 4 files changed, 57 insertions(+), 35 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs index 00169d082..cf5fdd2df 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs @@ -19,14 +19,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder public Block8x8F SourceBlock; /// - /// Temporal block 1 to store intermediate and/or final computation results. + /// Temporal block to store intermediate computation results. /// - public Block8x8F WorkspaceBlock1; - - /// - /// Temporal block 2 to store intermediate and/or final computation results. - /// - public Block8x8F WorkspaceBlock2; + public Block8x8F WorkspaceBlock; /// /// The quantization table as . @@ -50,8 +45,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder this.subSamplingDivisors = component.SubSamplingDivisors; this.SourceBlock = default; - this.WorkspaceBlock1 = default; - this.WorkspaceBlock2 = default; + this.WorkspaceBlock = default; } /// @@ -71,20 +65,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder int destAreaStride, float maximumValue) { - ref Block8x8F b = ref this.SourceBlock; - b.LoadFrom(ref sourceBlock); + ref Block8x8F block = ref this.SourceBlock; + block.LoadFrom(ref sourceBlock); // Dequantize: - b.MultiplyInPlace(ref this.DequantiazationTable); + block.MultiplyInPlace(ref this.DequantiazationTable); - FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2); + FastFloatingPointDCT.TransformInplaceIDCT(ref block, ref this.WorkspaceBlock); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. // To be "more accurate", we need to emulate this by rounding! - this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue); + block.NormalizeColorsAndRoundInPlace(maximumValue); - this.WorkspaceBlock1.ScaledCopyTo( + block.ScaledCopyTo( ref destAreaOrigin, destAreaStride, this.subSamplingDivisors.Width, diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 8b61b66c9..4f5ffb3f8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -94,8 +94,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private int bitCount; - private Block8x8F temporalBlock1; - private Block8x8F temporalBlock2; + private Block8x8F temporalBlock; private Block8x8 temporalShortBlock; /// @@ -299,23 +298,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// The quantization table index. /// The previous DC value. - /// Source block - /// Quantization table - /// The 8x8 Unzig block. + /// Source block. + /// Quantization table. /// The . private int WriteBlock( QuantIndex index, int prevDC, - ref Block8x8F src, + ref Block8x8F block, ref Block8x8F quant) { - ref Block8x8F refTemp1 = ref this.temporalBlock1; - ref Block8x8F refTemp2 = ref this.temporalBlock2; + ref Block8x8F refTemp = ref this.temporalBlock; ref Block8x8 spectralBlock = ref this.temporalShortBlock; - FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2); + // Shifting level from 0..255 to -128..127 + block.AddInPlace(-128f); - Block8x8F.Quantize(ref refTemp1, ref spectralBlock, ref quant); + // Discrete cosine transform + FastFloatingPointDCT.TransformInplaceFDCT(ref block, ref refTemp); + + // Quantization + Block8x8F.Quantize(ref block, ref spectralBlock, ref quant); // Emit the DC delta. int dc = spectralBlock[0]; diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index 0f569b5da..dd46a83e3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -276,28 +276,36 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Source /// Destination /// Temporary block provided by the caller for optimization - /// If true, a constant -128.0 offset is applied for all values before FDCT public static void TransformFDCT( ref Block8x8F src, ref Block8x8F dest, - ref Block8x8F temp, - bool offsetSourceByNeg128 = true) + ref Block8x8F temp) { src.TransposeInto(ref temp); - if (offsetSourceByNeg128) - { - temp.AddInPlace(-128F); - } - FDCT8x8(ref temp, ref dest); dest.TransposeInto(ref temp); - FDCT8x8(ref temp, ref dest); dest.MultiplyInPlace(C_0_125); } + /// + /// Apply floating point FDCT inplace. + /// + /// Input matrix. + /// Matrix to store temporal results. + public static void TransformInplaceFDCT(ref Block8x8F matrix, ref Block8x8F temp) + { + matrix.TransposeInto(ref temp); + FDCT8x8(ref temp, ref matrix); + + matrix.TransposeInto(ref temp); + FDCT8x8(ref temp, ref matrix); + + matrix.MultiplyInPlace(C_0_125); + } + /// /// Performs 8x8 matrix Inverse Discrete Cosine Transform /// @@ -510,5 +518,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? dest.MultiplyInPlace(C_0_125); } + + /// + /// Apply floating point IDCT inplace. + /// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239. + /// + /// Input matrix. + /// Matrix to store temporal results. + public static void TransformInplaceIDCT(ref Block8x8F block, ref Block8x8F temp) + { + block.TransposeInto(ref temp); + + IDCT8x8(ref temp, ref block); + block.TransposeInto(ref temp); + IDCT8x8(ref temp, ref block); + + // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? + block.MultiplyInPlace(C_0_125); + } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index d49a6498c..34ca7f9eb 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -310,7 +310,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); // testee - FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false); + FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2); var actualDest = new float[64]; destBlock.ScaledCopyTo(actualDest);