Browse Source

Merge branch 'SixLabors:master' into master

pull/1851/head
Ynse Hoornenborg 5 years ago
committed by GitHub
parent
commit
a13eae6686
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      shared-infrastructure
  2. 58
      src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
  3. 11
      src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
  4. 8
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
  5. 19
      src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs
  6. 28
      src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs
  7. 237
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
  8. 532
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
  9. 29
      src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs
  10. 3
      src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
  11. 2
      src/ImageSharp/Formats/Webp/EntropyIx.cs
  12. 2
      src/ImageSharp/Formats/Webp/HistoIx.cs
  13. 49
      src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
  14. 74
      src/ImageSharp/Formats/Webp/Lossless/CostManager.cs
  15. 10
      src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs
  16. 2
      src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs
  17. 4
      src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
  18. 51
      src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
  19. 2
      src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
  20. 2
      src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs
  21. 2
      src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs
  22. 26
      src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
  23. 40
      src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs
  24. 27
      src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
  25. 3
      src/ImageSharp/Formats/Webp/WebpLookupTables.cs
  26. 2
      src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs
  27. 5
      src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs
  28. 4
      src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs
  29. 0
      tests/ImageSharp.Benchmarks/Codecs/Bmp/DecodeBmp.cs
  30. 0
      tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmp.cs
  31. 0
      tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmpMultiple.cs
  32. 0
      tests/ImageSharp.Benchmarks/Codecs/Gif/DecodeGif.cs
  33. 0
      tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGif.cs
  34. 0
      tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGifMultiple.cs
  35. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/CmykColorConversion.cs
  36. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/ColorConversionBenchmark.cs
  37. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/GrayscaleColorConversion.cs
  38. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/RgbColorConversion.cs
  39. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs
  40. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrForwardConverterBenchmark.cs
  41. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YccKColorConverter.cs
  42. 82
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg.cs
  43. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/DecodeFilteredPng.cs
  44. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/DecodePng.cs
  45. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs
  46. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/EncodePng.cs
  47. 0
      tests/ImageSharp.Benchmarks/Codecs/Tga/DecodeTga.cs
  48. 0
      tests/ImageSharp.Benchmarks/Codecs/Tga/EncodeTga.cs
  49. 0
      tests/ImageSharp.Benchmarks/Codecs/Tiff/DecodeTiff.cs
  50. 0
      tests/ImageSharp.Benchmarks/Codecs/Tiff/EncodeTiff.cs
  51. 0
      tests/ImageSharp.Benchmarks/Codecs/Webp/DecodeWebp.cs
  52. 0
      tests/ImageSharp.Benchmarks/Codecs/Webp/EncodeWebp.cs
  53. 5
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  54. 26
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs
  55. 209
      tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
  56. 2
      tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs
  57. 2
      tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs
  58. 2
      tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs
  59. 15
      tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs
  60. 17
      tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs
  61. 14
      tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs
  62. 2
      tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs
  63. 4
      tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs
  64. 2
      tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs
  65. 2
      tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs
  66. 3
      tests/ImageSharp.Tests/TestImages.cs
  67. 3
      tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png
  68. 3
      tests/Images/Input/Jpg/baseline/winter444_interleaved.jpg
  69. 0
      tests/Images/Input/Jpg/progressive/winter420_noninterleaved.jpg

2
shared-infrastructure

@ -1 +1 @@
Subproject commit 33cb12ca77f919b44de56f344d2627cc2a108c3a Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3

58
src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs

@ -337,6 +337,64 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
} }
} }
/// <summary>
/// Transpose the block inplace.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInplace()
{
ref short elemRef = ref Unsafe.As<Block8x8, short>(ref this);
// row #0
Swap(ref Unsafe.Add(ref elemRef, 1), ref Unsafe.Add(ref elemRef, 8));
Swap(ref Unsafe.Add(ref elemRef, 2), ref Unsafe.Add(ref elemRef, 16));
Swap(ref Unsafe.Add(ref elemRef, 3), ref Unsafe.Add(ref elemRef, 24));
Swap(ref Unsafe.Add(ref elemRef, 4), ref Unsafe.Add(ref elemRef, 32));
Swap(ref Unsafe.Add(ref elemRef, 5), ref Unsafe.Add(ref elemRef, 40));
Swap(ref Unsafe.Add(ref elemRef, 6), ref Unsafe.Add(ref elemRef, 48));
Swap(ref Unsafe.Add(ref elemRef, 7), ref Unsafe.Add(ref elemRef, 56));
// row #1
Swap(ref Unsafe.Add(ref elemRef, 10), ref Unsafe.Add(ref elemRef, 17));
Swap(ref Unsafe.Add(ref elemRef, 11), ref Unsafe.Add(ref elemRef, 25));
Swap(ref Unsafe.Add(ref elemRef, 12), ref Unsafe.Add(ref elemRef, 33));
Swap(ref Unsafe.Add(ref elemRef, 13), ref Unsafe.Add(ref elemRef, 41));
Swap(ref Unsafe.Add(ref elemRef, 14), ref Unsafe.Add(ref elemRef, 49));
Swap(ref Unsafe.Add(ref elemRef, 15), ref Unsafe.Add(ref elemRef, 57));
// row #2
Swap(ref Unsafe.Add(ref elemRef, 19), ref Unsafe.Add(ref elemRef, 26));
Swap(ref Unsafe.Add(ref elemRef, 20), ref Unsafe.Add(ref elemRef, 34));
Swap(ref Unsafe.Add(ref elemRef, 21), ref Unsafe.Add(ref elemRef, 42));
Swap(ref Unsafe.Add(ref elemRef, 22), ref Unsafe.Add(ref elemRef, 50));
Swap(ref Unsafe.Add(ref elemRef, 23), ref Unsafe.Add(ref elemRef, 58));
// row #3
Swap(ref Unsafe.Add(ref elemRef, 28), ref Unsafe.Add(ref elemRef, 35));
Swap(ref Unsafe.Add(ref elemRef, 29), ref Unsafe.Add(ref elemRef, 43));
Swap(ref Unsafe.Add(ref elemRef, 30), ref Unsafe.Add(ref elemRef, 51));
Swap(ref Unsafe.Add(ref elemRef, 31), ref Unsafe.Add(ref elemRef, 59));
// row #4
Swap(ref Unsafe.Add(ref elemRef, 37), ref Unsafe.Add(ref elemRef, 44));
Swap(ref Unsafe.Add(ref elemRef, 38), ref Unsafe.Add(ref elemRef, 52));
Swap(ref Unsafe.Add(ref elemRef, 39), ref Unsafe.Add(ref elemRef, 60));
// row #5
Swap(ref Unsafe.Add(ref elemRef, 46), ref Unsafe.Add(ref elemRef, 53));
Swap(ref Unsafe.Add(ref elemRef, 47), ref Unsafe.Add(ref elemRef, 61));
// row #6
Swap(ref Unsafe.Add(ref elemRef, 55), ref Unsafe.Add(ref elemRef, 62));
static void Swap(ref short a, ref short b)
{
short tmp = a;
a = b;
b = tmp;
}
}
/// <summary> /// <summary>
/// Calculate the total sum of absolute differences of elements in 'a' and 'b'. /// Calculate the total sum of absolute differences of elements in 'a' and 'b'.
/// </summary> /// </summary>

11
src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs

@ -151,6 +151,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (this.componentsCount == this.frame.ComponentCount) if (this.componentsCount == this.frame.ComponentCount)
{ {
this.ParseBaselineDataInterleaved(); this.ParseBaselineDataInterleaved();
this.spectralConverter.CommitConversion();
} }
else else
{ {
@ -501,7 +502,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{ {
i += r; i += r;
s = buffer.Receive(s); s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i++]) = (short)s; Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[i++]) = (short)s;
} }
else else
{ {
@ -570,7 +571,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (s != 0) if (s != 0)
{ {
s = buffer.Receive(s); s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i]) = (short)(s << low); Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[i]) = (short)(s << low);
} }
else else
{ {
@ -646,7 +647,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
do do
{ {
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]); ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]);
if (coef != 0) if (coef != 0)
{ {
buffer.CheckBits(); buffer.CheckBits();
@ -672,7 +673,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if ((s != 0) && (k < 64)) if ((s != 0) && (k < 64))
{ {
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]) = (short)s; Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]) = (short)s;
} }
} }
} }
@ -681,7 +682,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{ {
for (; k <= end; k++) for (; k <= end; k++)
{ {
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]); ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]);
if (coef != 0) if (coef != 0)
{ {

8
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

@ -18,11 +18,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </summary> /// </summary>
public Block8x8F SourceBlock; public Block8x8F SourceBlock;
/// <summary>
/// Temporal block to store intermediate computation results.
/// </summary>
public Block8x8F WorkspaceBlock;
/// <summary> /// <summary>
/// The quantization table as <see cref="Block8x8F"/>. /// The quantization table as <see cref="Block8x8F"/>.
/// </summary> /// </summary>
@ -45,7 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.subSamplingDivisors = component.SubSamplingDivisors; this.subSamplingDivisors = component.SubSamplingDivisors;
this.SourceBlock = default; this.SourceBlock = default;
this.WorkspaceBlock = default;
} }
/// <summary> /// <summary>
@ -71,7 +65,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
// Dequantize: // Dequantize:
block.MultiplyInPlace(ref this.DequantiazationTable); block.MultiplyInPlace(ref this.DequantiazationTable);
FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock); FastFloatingPointDCT.TransformIDCT(ref block);
// To conform better to libjpeg we actually NEED TO loose precision here. // To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations. // This is because they store blocks as Int16 between all the operations.

19
src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs

@ -13,6 +13,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </remarks> /// </remarks>
internal abstract class SpectralConverter internal abstract class SpectralConverter
{ {
/// <summary>
/// Gets a value indicating whether this converter has converted spectral
/// data of the current image or not.
/// </summary>
protected bool Converted { get; private set; }
/// <summary> /// <summary>
/// Injects jpeg image decoding metadata. /// Injects jpeg image decoding metadata.
/// </summary> /// </summary>
@ -33,6 +39,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </remarks> /// </remarks>
public abstract void ConvertStrideBaseline(); public abstract void ConvertStrideBaseline();
/// <summary>
/// Marks current converter state as 'converted'.
/// </summary>
/// <remarks>
/// This must be called only for baseline interleaved jpeg's.
/// </remarks>
public void CommitConversion()
{
DebugGuard.IsFalse(this.Converted, nameof(this.Converted), $"{nameof(this.CommitConversion)} must be called only once");
this.Converted = true;
}
/// <summary> /// <summary>
/// Gets the color converter. /// Gets the color converter.
/// </summary> /// </summary>

28
src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs

@ -3,6 +3,7 @@
using System; using System;
using System.Buffers; using System.Buffers;
using System.Linq;
using System.Numerics; using System.Numerics;
using System.Threading; using System.Threading;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
@ -29,8 +30,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
private Buffer2D<TPixel> pixelBuffer; private Buffer2D<TPixel> pixelBuffer;
private int blockRowsPerStep;
private int pixelRowsPerStep; private int pixelRowsPerStep;
private int pixelRowCounter; private int pixelRowCounter;
@ -41,8 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.cancellationToken = cancellationToken; this.cancellationToken = cancellationToken;
} }
private bool Converted => this.pixelRowCounter >= this.pixelBuffer.Height;
public Buffer2D<TPixel> GetPixelBuffer() public Buffer2D<TPixel> GetPixelBuffer()
{ {
if (!this.Converted) if (!this.Converted)
@ -52,7 +49,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
for (int step = 0; step < steps; step++) for (int step = 0; step < steps; step++)
{ {
this.cancellationToken.ThrowIfCancellationRequested(); this.cancellationToken.ThrowIfCancellationRequested();
this.ConvertNextStride(step); this.ConvertStride(step);
} }
} }
@ -65,18 +62,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
MemoryAllocator allocator = this.configuration.MemoryAllocator; MemoryAllocator allocator = this.configuration.MemoryAllocator;
// iteration data // iteration data
IJpegComponent c0 = frame.Components[0]; int majorBlockWidth = frame.Components.Max((component) => component.SizeInBlocks.Width);
int majorVerticalSamplingFactor = frame.Components.Max((component) => component.SamplingFactors.Height);
const int blockPixelHeight = 8; const int blockPixelHeight = 8;
this.blockRowsPerStep = c0.SamplingFactors.Height; this.pixelRowsPerStep = majorVerticalSamplingFactor * blockPixelHeight;
this.pixelRowsPerStep = this.blockRowsPerStep * blockPixelHeight;
// pixel buffer for resulting image // pixel buffer for resulting image
this.pixelBuffer = allocator.Allocate2D<TPixel>(frame.PixelWidth, frame.PixelHeight); this.pixelBuffer = allocator.Allocate2D<TPixel>(frame.PixelWidth, frame.PixelHeight);
this.paddedProxyPixelRow = allocator.Allocate<TPixel>(frame.PixelWidth + 3); this.paddedProxyPixelRow = allocator.Allocate<TPixel>(frame.PixelWidth + 3);
// component processors from spectral to Rgba32 // component processors from spectral to Rgba32
var postProcessorBufferSize = new Size(c0.SizeInBlocks.Width * 8, this.pixelRowsPerStep); const int blockPixelWidth = 8;
var postProcessorBufferSize = new Size(majorBlockWidth * blockPixelWidth, this.pixelRowsPerStep);
this.componentProcessors = new JpegComponentPostProcessor[frame.Components.Length]; this.componentProcessors = new JpegComponentPostProcessor[frame.Components.Length];
for (int i = 0; i < this.componentProcessors.Length; i++) for (int i = 0; i < this.componentProcessors.Length; i++)
{ {
@ -84,7 +82,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
} }
// single 'stride' rgba32 buffer for conversion between spectral and TPixel // single 'stride' rgba32 buffer for conversion between spectral and TPixel
// this.rgbaBuffer = allocator.Allocate<Vector4>(frame.PixelWidth);
this.rgbBuffer = allocator.Allocate<byte>(frame.PixelWidth * 3); this.rgbBuffer = allocator.Allocate<byte>(frame.PixelWidth * 3);
// color converter from Rgba32 to TPixel // color converter from Rgba32 to TPixel
@ -95,18 +92,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public override void ConvertStrideBaseline() public override void ConvertStrideBaseline()
{ {
// Convert next pixel stride using single spectral `stride' // Convert next pixel stride using single spectral `stride'
// Note that zero passing eliminates the need of virtual call from JpegComponentPostProcessor // Note that zero passing eliminates the need of virtual call
this.ConvertNextStride(spectralStep: 0); // from JpegComponentPostProcessor
this.ConvertStride(spectralStep: 0);
// Clear spectral stride - this is VERY important as jpeg possibly won't fill entire buffer each stride
// Which leads to decoding artifacts
// Note that this code clears all buffers of the post processors, it's their responsibility to allocate only single stride
foreach (JpegComponentPostProcessor cpp in this.componentProcessors) foreach (JpegComponentPostProcessor cpp in this.componentProcessors)
{ {
cpp.ClearSpectralBuffers(); cpp.ClearSpectralBuffers();
} }
} }
/// <inheritdoc/>
public void Dispose() public void Dispose()
{ {
if (this.componentProcessors != null) if (this.componentProcessors != null)
@ -121,7 +117,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.paddedProxyPixelRow?.Dispose(); this.paddedProxyPixelRow?.Dispose();
} }
private void ConvertNextStride(int spectralStep) private void ConvertStride(int spectralStep)
{ {
int maxY = Math.Min(this.pixelBuffer.Height, this.pixelRowCounter + this.pixelRowsPerStep); int maxY = Math.Min(this.pixelBuffer.Height, this.pixelRowCounter + this.pixelRowsPerStep);

237
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs

@ -2,9 +2,6 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
@ -12,149 +9,147 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{ {
internal static partial class FastFloatingPointDCT internal static partial class FastFloatingPointDCT
{ {
#pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings #pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings
private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f); private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f); private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f);
private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f); private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f);
private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f); private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f);
private static readonly Vector256<float> mm256_F_1_1758 = Vector256.Create(1.175876f); private static readonly Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
private static readonly Vector256<float> mm256_F_n1_9615 = Vector256.Create(-1.961570560f); private static readonly Vector256<float> mm256_F_1_8477 = Vector256.Create(1.847759065f);
private static readonly Vector256<float> mm256_F_n0_3901 = Vector256.Create(-0.390180644f); private static readonly Vector256<float> mm256_F_n1_0823 = Vector256.Create(-1.082392200f);
private static readonly Vector256<float> mm256_F_n0_8999 = Vector256.Create(-0.899976223f); private static readonly Vector256<float> mm256_F_n2_6131 = Vector256.Create(-2.613125930f);
private static readonly Vector256<float> mm256_F_n2_5629 = Vector256.Create(-2.562915447f);
private static readonly Vector256<float> mm256_F_0_2986 = Vector256.Create(0.298631336f);
private static readonly Vector256<float> mm256_F_2_0531 = Vector256.Create(2.053119869f);
private static readonly Vector256<float> mm256_F_3_0727 = Vector256.Create(3.072711026f);
private static readonly Vector256<float> mm256_F_1_5013 = Vector256.Create(1.501321110f);
private static readonly Vector256<float> mm256_F_n1_8477 = Vector256.Create(-1.847759065f);
private static readonly Vector256<float> mm256_F_0_7653 = Vector256.Create(0.765366865f);
#pragma warning restore SA1310, SA1311, IDE1006 #pragma warning restore SA1310, SA1311, IDE1006
/// <summary> /// <summary>
/// Apply floating point FDCT inplace using simd operations. /// Apply floating point FDCT inplace using simd operations.
/// </summary> /// </summary>
/// <param name="block">Input matrix.</param> /// <param name="block">Input block.</param>
private static void ForwardTransform_Avx(ref Block8x8F block) private static void FDCT8x8_Avx(ref Block8x8F block)
{ {
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation."); DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
// First pass - process rows // First pass - process rows
block.TransposeInplace(); block.TransposeInplace();
FDCT8x8_Avx(ref block); FDCT8x8_1D_Avx(ref block);
// Second pass - process columns // Second pass - process columns
block.TransposeInplace(); block.TransposeInplace();
FDCT8x8_Avx(ref block); FDCT8x8_1D_Avx(ref block);
// Applies 1D floating point FDCT inplace
static void FDCT8x8_1D_Avx(ref Block8x8F block)
{
Vector256<float> tmp0 = Avx.Add(block.V0, block.V7);
Vector256<float> tmp7 = Avx.Subtract(block.V0, block.V7);
Vector256<float> tmp1 = Avx.Add(block.V1, block.V6);
Vector256<float> tmp6 = Avx.Subtract(block.V1, block.V6);
Vector256<float> tmp2 = Avx.Add(block.V2, block.V5);
Vector256<float> tmp5 = Avx.Subtract(block.V2, block.V5);
Vector256<float> tmp3 = Avx.Add(block.V3, block.V4);
Vector256<float> tmp4 = Avx.Subtract(block.V3, block.V4);
// Even part
Vector256<float> tmp10 = Avx.Add(tmp0, tmp3);
Vector256<float> tmp13 = Avx.Subtract(tmp0, tmp3);
Vector256<float> tmp11 = Avx.Add(tmp1, tmp2);
Vector256<float> tmp12 = Avx.Subtract(tmp1, tmp2);
block.V0 = Avx.Add(tmp10, tmp11);
block.V4 = Avx.Subtract(tmp10, tmp11);
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
block.V2 = Avx.Add(tmp13, z1);
block.V6 = Avx.Subtract(tmp13, z1);
// Odd part
tmp10 = Avx.Add(tmp4, tmp5);
tmp11 = Avx.Add(tmp5, tmp6);
tmp12 = Avx.Add(tmp6, tmp7);
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);
Vector256<float> z11 = Avx.Add(tmp7, z3);
Vector256<float> z13 = Avx.Subtract(tmp7, z3);
block.V5 = Avx.Add(z13, z2);
block.V3 = Avx.Subtract(z13, z2);
block.V1 = Avx.Add(z11, z4);
block.V7 = Avx.Subtract(z11, z4);
}
} }
/// <summary> /// <summary>
/// Apply 1D floating point FDCT inplace using AVX operations on 8x8 matrix. /// Apply floating point IDCT inplace using simd operations.
/// </summary> /// </summary>
/// <remarks> /// <param name="transposedBlock">Transposed input block.</param>
/// Requires Avx support. private static void IDCT8x8_Avx(ref Block8x8F transposedBlock)
/// </remarks>
/// <param name="block">Input matrix.</param>
public static void FDCT8x8_Avx(ref Block8x8F block)
{ {
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation."); DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
Vector256<float> tmp0 = Avx.Add(block.V0, block.V7); // First pass - process columns
Vector256<float> tmp7 = Avx.Subtract(block.V0, block.V7); IDCT8x8_1D_Avx(ref transposedBlock);
Vector256<float> tmp1 = Avx.Add(block.V1, block.V6);
Vector256<float> tmp6 = Avx.Subtract(block.V1, block.V6); // Second pass - process rows
Vector256<float> tmp2 = Avx.Add(block.V2, block.V5); transposedBlock.TransposeInplace();
Vector256<float> tmp5 = Avx.Subtract(block.V2, block.V5); IDCT8x8_1D_Avx(ref transposedBlock);
Vector256<float> tmp3 = Avx.Add(block.V3, block.V4);
Vector256<float> tmp4 = Avx.Subtract(block.V3, block.V4); // Applies 1D floating point FDCT inplace
static void IDCT8x8_1D_Avx(ref Block8x8F block)
// Even part {
Vector256<float> tmp10 = Avx.Add(tmp0, tmp3); // Even part
Vector256<float> tmp13 = Avx.Subtract(tmp0, tmp3); Vector256<float> tmp0 = block.V0;
Vector256<float> tmp11 = Avx.Add(tmp1, tmp2); Vector256<float> tmp1 = block.V2;
Vector256<float> tmp12 = Avx.Subtract(tmp1, tmp2); Vector256<float> tmp2 = block.V4;
Vector256<float> tmp3 = block.V6;
block.V0 = Avx.Add(tmp10, tmp11);
block.V4 = Avx.Subtract(tmp10, tmp11); Vector256<float> z5 = tmp0;
Vector256<float> tmp10 = Avx.Add(z5, tmp2);
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071); Vector256<float> tmp11 = Avx.Subtract(z5, tmp2);
block.V2 = Avx.Add(tmp13, z1);
block.V6 = Avx.Subtract(tmp13, z1); Vector256<float> tmp13 = Avx.Add(tmp1, tmp3);
Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);
// Odd part
tmp10 = Avx.Add(tmp4, tmp5); tmp0 = Avx.Add(tmp10, tmp13);
tmp11 = Avx.Add(tmp5, tmp6); tmp3 = Avx.Subtract(tmp10, tmp13);
tmp12 = Avx.Add(tmp6, tmp7); tmp1 = Avx.Add(tmp11, tmp12);
tmp2 = Avx.Subtract(tmp11, tmp12);
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10); // Odd part
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12); Vector256<float> tmp4 = block.V1;
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071); Vector256<float> tmp5 = block.V3;
Vector256<float> tmp6 = block.V5;
Vector256<float> z11 = Avx.Add(tmp7, z3); Vector256<float> tmp7 = block.V7;
Vector256<float> z13 = Avx.Subtract(tmp7, z3);
Vector256<float> z13 = Avx.Add(tmp6, tmp5);
block.V5 = Avx.Add(z13, z2); Vector256<float> z10 = Avx.Subtract(tmp6, tmp5);
block.V3 = Avx.Subtract(z13, z2); Vector256<float> z11 = Avx.Add(tmp4, tmp7);
block.V1 = Avx.Add(z11, z4); Vector256<float> z12 = Avx.Subtract(tmp4, tmp7);
block.V7 = Avx.Subtract(z11, z4);
} tmp7 = Avx.Add(z11, z13);
tmp11 = Avx.Multiply(Avx.Subtract(z11, z13), mm256_F_1_4142);
/// <summary>
/// Combined operation of <see cref="IDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="IDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/> z5 = Avx.Multiply(Avx.Add(z10, z12), mm256_F_1_8477);
/// using AVX commands.
/// </summary> tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, mm256_F_n1_0823);
/// <param name="s">Source</param> tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, mm256_F_n2_6131);
/// <param name="d">Destination</param>
public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) tmp6 = Avx.Subtract(tmp12, tmp7);
{ tmp5 = Avx.Subtract(tmp11, tmp6);
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method"); tmp4 = Avx.Subtract(tmp10, tmp5);
Vector256<float> my1 = s.V1; block.V0 = Avx.Add(tmp0, tmp7);
Vector256<float> my7 = s.V7; block.V7 = Avx.Subtract(tmp0, tmp7);
Vector256<float> mz0 = Avx.Add(my1, my7); block.V1 = Avx.Add(tmp1, tmp6);
block.V6 = Avx.Subtract(tmp1, tmp6);
Vector256<float> my3 = s.V3; block.V2 = Avx.Add(tmp2, tmp5);
Vector256<float> mz2 = Avx.Add(my3, my7); block.V5 = Avx.Subtract(tmp2, tmp5);
Vector256<float> my5 = s.V5; block.V3 = Avx.Add(tmp3, tmp4);
Vector256<float> mz1 = Avx.Add(my3, my5); block.V4 = Avx.Subtract(tmp3, tmp4);
Vector256<float> mz3 = Avx.Add(my1, my5); }
Vector256<float> mz4 = Avx.Multiply(Avx.Add(mz0, mz1), mm256_F_1_1758);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, mm256_F_n1_9615);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, mm256_F_n0_3901);
mz0 = Avx.Multiply(mz0, mm256_F_n0_8999);
mz1 = Avx.Multiply(mz1, mm256_F_n2_5629);
Vector256<float> mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, mm256_F_0_2986), mz2);
Vector256<float> mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, mm256_F_2_0531), mz3);
Vector256<float> mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, mm256_F_3_0727), mz2);
Vector256<float> mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, mm256_F_1_5013), mz3);
Vector256<float> my2 = s.V2;
Vector256<float> my6 = s.V6;
mz4 = Avx.Multiply(Avx.Add(my2, my6), mm256_F_0_5411);
Vector256<float> my0 = s.V0;
Vector256<float> my4 = s.V4;
mz0 = Avx.Add(my0, my4);
mz1 = Avx.Subtract(my0, my4);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, mm256_F_n1_8477);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, mm256_F_0_7653);
my0 = Avx.Add(mz0, mz3);
my3 = Avx.Subtract(mz0, mz3);
my1 = Avx.Add(mz1, mz2);
my2 = Avx.Subtract(mz1, mz2);
d.V0 = Avx.Add(my0, mb0);
d.V7 = Avx.Subtract(my0, mb0);
d.V1 = Avx.Add(my1, mb1);
d.V6 = Avx.Subtract(my1, mb1);
d.V2 = Avx.Add(my2, mb2);
d.V5 = Avx.Subtract(my2, mb2);
d.V3 = Avx.Add(my3, mb3);
d.V4 = Avx.Subtract(my3, mb3);
} }
} }
} }

532
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -3,6 +3,7 @@
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
#endif #endif
@ -15,102 +16,202 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary> /// </summary>
internal static partial class FastFloatingPointDCT internal static partial class FastFloatingPointDCT
{ {
#pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore #pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings
private const float C_1_175876 = 1.175875602f; private static readonly Vector4 mm128_F_0_7071 = new(0.707106781f);
private const float C_1_961571 = -1.961570560f; private static readonly Vector4 mm128_F_0_3826 = new(0.382683433f);
private const float C_0_390181 = -0.390180644f; private static readonly Vector4 mm128_F_0_5411 = new(0.541196100f);
private const float C_0_899976 = -0.899976223f; private static readonly Vector4 mm128_F_1_3065 = new(1.306562965f);
private const float C_2_562915 = -2.562915447f;
private const float C_0_298631 = 0.298631336f; private static readonly Vector4 mm128_F_1_4142 = new(1.414213562f);
private const float C_2_053120 = 2.053119869f; private static readonly Vector4 mm128_F_1_8477 = new(1.847759065f);
private const float C_3_072711 = 3.072711026f; private static readonly Vector4 mm128_F_n1_0823 = new(-1.082392200f);
private const float C_1_501321 = 1.501321110f; private static readonly Vector4 mm128_F_n2_6131 = new(-2.613125930f);
private const float C_0_541196 = 0.541196100f; #pragma warning restore SA1310, SA1311, IDE1006
private const float C_1_847759 = -1.847759065f;
private const float C_0_765367 = 0.765366865f;
private const float C_0_125 = 0.1250f;
#pragma warning disable SA1311, IDE1006 // naming rules violation warnings
private static readonly Vector4 mm128_F_0_7071 = new Vector4(0.707106781f);
private static readonly Vector4 mm128_F_0_3826 = new Vector4(0.382683433f);
private static readonly Vector4 mm128_F_0_5411 = new Vector4(0.541196100f);
private static readonly Vector4 mm128_F_1_3065 = new Vector4(1.306562965f);
#pragma warning restore SA1311, IDE1006
#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore
/// <summary> /// <summary>
/// Gets reciprocal coefficients for jpeg quantization tables calculation. /// Gets adjustment table for quantization tables.
/// </summary> /// </summary>
/// <remarks> /// <remarks>
/// <para> /// <para>
/// Current FDCT implementation expects its results to be multiplied by /// Current IDCT and FDCT implementations are based on Arai, Agui,
/// a reciprocal quantization table. To get 8x8 reciprocal block values in this /// and Nakajima's algorithm. Both DCT methods does not
/// table must be divided by quantization table values scaled with quality settings. /// produce finished DCT output, final step is fused into the
/// quantization step. Quantization and de-quantization coefficients
/// must be multiplied by these values.
/// </para> /// </para>
/// <para> /// <para>
/// These values were calculates with this formula: /// Given values were generated by formula:
/// <code>
/// value[row * 8 + col] = scalefactor[row] * scalefactor[col] * 8;
/// </code>
/// Where:
/// <code> /// <code>
/// scalefactor[row] * scalefactor[col], where
/// scalefactor[0] = 1 /// scalefactor[0] = 1
/// </code>
/// <code>
/// scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 /// scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
/// </code> /// </code>
/// Values are also scaled by 8 so DCT code won't do extra division/multiplication.
/// </para> /// </para>
/// </remarks> /// </remarks>
internal static readonly float[] DctReciprocalAdjustmentCoefficients = new float[] private static readonly float[] AdjustmentCoefficients = new float[]
{ {
0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f, 1f, 1.3870399f, 1.306563f, 1.1758755f, 1f, 0.78569496f, 0.5411961f, 0.27589938f,
0.09011998f, 0.064972885f, 0.068974845f, 0.07664074f, 0.09011998f, 0.11470097f, 0.16652f, 0.32664075f, 1.3870399f, 1.9238797f, 1.812255f, 1.6309863f, 1.3870399f, 1.0897902f, 0.7506606f, 0.38268346f,
0.09567086f, 0.068974845f, 0.07322331f, 0.081361376f, 0.09567086f, 0.121765904f, 0.17677669f, 0.34675997f, 1.306563f, 1.812255f, 1.707107f, 1.5363555f, 1.306563f, 1.02656f, 0.7071068f, 0.36047992f,
0.10630376f, 0.07664074f, 0.081361376f, 0.09040392f, 0.10630376f, 0.13529903f, 0.19642374f, 0.38529903f, 1.1758755f, 1.6309863f, 1.5363555f, 1.3826833f, 1.1758755f, 0.9238795f, 0.63637924f, 0.32442334f,
0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f, 1f, 1.3870399f, 1.306563f, 1.1758755f, 1f, 0.78569496f, 0.5411961f, 0.27589938f,
0.15909483f, 0.11470097f, 0.121765904f, 0.13529903f, 0.15909483f, 0.2024893f, 0.2939689f, 0.5766407f, 0.78569496f, 1.0897902f, 1.02656f, 0.9238795f, 0.78569496f, 0.61731654f, 0.42521507f, 0.21677275f,
0.23096988f, 0.16652f, 0.17677669f, 0.19642374f, 0.23096988f, 0.2939689f, 0.4267767f, 0.8371526f, 0.5411961f, 0.7506606f, 0.7071068f, 0.63637924f, 0.5411961f, 0.42521507f, 0.29289323f, 0.14931567f,
0.45306373f, 0.32664075f, 0.34675997f, 0.38529903f, 0.45306373f, 0.5766407f, 0.8371526f, 1.642134f, 0.27589938f, 0.38268346f, 0.36047992f, 0.32442334f, 0.27589938f, 0.21677275f, 0.14931567f, 0.076120466f,
}; };
/// <summary> /// <summary>
/// Adjusts given quantization table to be complient with FDCT implementation. /// Adjusts given quantization table for usage with <see cref="TransformIDCT"/>.
/// </summary>
/// <param name="quantTable">Quantization table to adjust.</param>
public static void AdjustToIDCT(ref Block8x8F quantTable)
{
ref float tableRef = ref Unsafe.As<Block8x8F, float>(ref quantTable);
ref float multipliersRef = ref MemoryMarshal.GetReference<float>(AdjustmentCoefficients);
for (nint i = 0; i < Block8x8F.Size; i++)
{
tableRef = 0.125f * tableRef * Unsafe.Add(ref multipliersRef, i);
tableRef = ref Unsafe.Add(ref tableRef, 1);
}
// Spectral macroblocks are transposed before quantization
// so we must transpose quantization table
quantTable.TransposeInplace();
}
/// <summary>
/// Adjusts given quantization table for usage with <see cref="TransformFDCT"/>.
/// </summary>
/// <param name="quantTable">Quantization table to adjust.</param>
public static void AdjustToFDCT(ref Block8x8F quantTable)
{
ref float tableRef = ref Unsafe.As<Block8x8F, float>(ref quantTable);
ref float multipliersRef = ref MemoryMarshal.GetReference<float>(AdjustmentCoefficients);
for (nint i = 0; i < Block8x8F.Size; i++)
{
tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i));
tableRef = ref Unsafe.Add(ref tableRef, 1);
}
}
/// <summary>
/// Apply 2D floating point IDCT inplace.
/// </summary> /// </summary>
/// <remarks> /// <remarks>
/// See <see cref="DctReciprocalAdjustmentCoefficients"/> docs for explanation. /// Input block must be dequantized before this method with table
/// adjusted by <see cref="AdjustToIDCT"/>.
/// </remarks> /// </remarks>
/// <param name="quantizationtable">Quantization table to adjust.</param> /// <param name="block">Input block.</param>
public static void AdjustToFDCT(ref Block8x8F quantizationtable) public static void TransformIDCT(ref Block8x8F block)
{ {
for (int i = 0; i < Block8x8F.Size; i++) #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{ {
quantizationtable[i] = DctReciprocalAdjustmentCoefficients[i] / quantizationtable[i]; IDCT8x8_Avx(ref block);
}
else
#endif
{
IDCT_Vector4(ref block);
} }
} }
/// <summary> /// <summary>
/// Apply 2D floating point FDCT inplace. /// Apply 2D floating point IDCT inplace.
/// </summary> /// </summary>
/// <param name="block">Input matrix.</param> /// <remarks>
/// Input block must be quantized after this method with table adjusted
/// by <see cref="AdjustToFDCT"/>.
/// </remarks>
/// <param name="block">Input block.</param>
public static void TransformFDCT(ref Block8x8F block) public static void TransformFDCT(ref Block8x8F block)
{ {
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported) if (Avx.IsSupported)
{ {
ForwardTransform_Avx(ref block); FDCT8x8_Avx(ref block);
} }
else else
#endif #endif
if (Vector.IsHardwareAccelerated) if (Vector.IsHardwareAccelerated)
{ {
ForwardTransform_Vector4(ref block); FDCT_Vector4(ref block);
} }
else else
{ {
ForwardTransform_Scalar(ref block); FDCT_Scalar(ref block);
}
}
/// <summary>
/// Apply floating point IDCT inplace using <see cref="Vector4"/> API.
/// </summary>
/// <param name="transposedBlock">Input block.</param>
private static void IDCT_Vector4(ref Block8x8F transposedBlock)
{
DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");
// First pass - process columns
IDCT8x4_Vector4(ref transposedBlock.V0L);
IDCT8x4_Vector4(ref transposedBlock.V0R);
// Second pass - process rows
transposedBlock.TransposeInplace();
IDCT8x4_Vector4(ref transposedBlock.V0L);
IDCT8x4_Vector4(ref transposedBlock.V0R);
// Applies 1D floating point IDCT inplace on 8x4 part of 8x8 block
static void IDCT8x4_Vector4(ref Vector4 vecRef)
{
// Even part
Vector4 tmp0 = Unsafe.Add(ref vecRef, 0 * 2);
Vector4 tmp1 = Unsafe.Add(ref vecRef, 2 * 2);
Vector4 tmp2 = Unsafe.Add(ref vecRef, 4 * 2);
Vector4 tmp3 = Unsafe.Add(ref vecRef, 6 * 2);
Vector4 z5 = tmp0;
Vector4 tmp10 = z5 + tmp2;
Vector4 tmp11 = z5 - tmp2;
Vector4 tmp13 = tmp1 + tmp3;
Vector4 tmp12 = ((tmp1 - tmp3) * mm128_F_1_4142) - tmp13;
tmp0 = tmp10 + tmp13;
tmp3 = tmp10 - tmp13;
tmp1 = tmp11 + tmp12;
tmp2 = tmp11 - tmp12;
// Odd part
Vector4 tmp4 = Unsafe.Add(ref vecRef, 1 * 2);
Vector4 tmp5 = Unsafe.Add(ref vecRef, 3 * 2);
Vector4 tmp6 = Unsafe.Add(ref vecRef, 5 * 2);
Vector4 tmp7 = Unsafe.Add(ref vecRef, 7 * 2);
Vector4 z13 = tmp6 + tmp5;
Vector4 z10 = tmp6 - tmp5;
Vector4 z11 = tmp4 + tmp7;
Vector4 z12 = tmp4 - tmp7;
tmp7 = z11 + z13;
tmp11 = (z11 - z13) * mm128_F_1_4142;
z5 = (z10 + z12) * mm128_F_1_8477;
tmp10 = (z12 * mm128_F_n1_0823) + z5;
tmp12 = (z10 * mm128_F_n2_6131) + z5;
tmp6 = tmp12 - tmp7;
tmp5 = tmp11 - tmp6;
tmp4 = tmp10 - tmp5;
Unsafe.Add(ref vecRef, 0 * 2) = tmp0 + tmp7;
Unsafe.Add(ref vecRef, 7 * 2) = tmp0 - tmp7;
Unsafe.Add(ref vecRef, 1 * 2) = tmp1 + tmp6;
Unsafe.Add(ref vecRef, 6 * 2) = tmp1 - tmp6;
Unsafe.Add(ref vecRef, 2 * 2) = tmp2 + tmp5;
Unsafe.Add(ref vecRef, 5 * 2) = tmp2 - tmp5;
Unsafe.Add(ref vecRef, 3 * 2) = tmp3 + tmp4;
Unsafe.Add(ref vecRef, 4 * 2) = tmp3 - tmp4;
} }
} }
@ -120,8 +221,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <remarks> /// <remarks>
/// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c. /// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c.
/// </remarks> /// </remarks>
/// <param name="block">Input matrix.</param> /// <param name="block">Input block.</param>
private static void ForwardTransform_Scalar(ref Block8x8F block) private static void FDCT_Scalar(ref Block8x8F block)
{ {
const int dctSize = 8; const int dctSize = 8;
@ -130,17 +231,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
float z1, z2, z3, z4, z5, z11, z13; float z1, z2, z3, z4, z5, z11, z13;
// First pass - process rows // First pass - process rows
ref float dataRef = ref Unsafe.As<Block8x8F, float>(ref block); ref float blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--) for (int ctr = 7; ctr >= 0; ctr--)
{ {
tmp0 = Unsafe.Add(ref dataRef, 0) + Unsafe.Add(ref dataRef, 7); tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 7);
tmp7 = Unsafe.Add(ref dataRef, 0) - Unsafe.Add(ref dataRef, 7); tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 7);
tmp1 = Unsafe.Add(ref dataRef, 1) + Unsafe.Add(ref dataRef, 6); tmp1 = Unsafe.Add(ref blockRef, 1) + Unsafe.Add(ref blockRef, 6);
tmp6 = Unsafe.Add(ref dataRef, 1) - Unsafe.Add(ref dataRef, 6); tmp6 = Unsafe.Add(ref blockRef, 1) - Unsafe.Add(ref blockRef, 6);
tmp2 = Unsafe.Add(ref dataRef, 2) + Unsafe.Add(ref dataRef, 5); tmp2 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 5);
tmp5 = Unsafe.Add(ref dataRef, 2) - Unsafe.Add(ref dataRef, 5); tmp5 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 5);
tmp3 = Unsafe.Add(ref dataRef, 3) + Unsafe.Add(ref dataRef, 4); tmp3 = Unsafe.Add(ref blockRef, 3) + Unsafe.Add(ref blockRef, 4);
tmp4 = Unsafe.Add(ref dataRef, 3) - Unsafe.Add(ref dataRef, 4); tmp4 = Unsafe.Add(ref blockRef, 3) - Unsafe.Add(ref blockRef, 4);
// Even part // Even part
tmp10 = tmp0 + tmp3; tmp10 = tmp0 + tmp3;
@ -148,12 +249,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
tmp11 = tmp1 + tmp2; tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2; tmp12 = tmp1 - tmp2;
Unsafe.Add(ref dataRef, 0) = tmp10 + tmp11; Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref dataRef, 4) = tmp10 - tmp11; Unsafe.Add(ref blockRef, 4) = tmp10 - tmp11;
z1 = (tmp12 + tmp13) * 0.707106781f; z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref dataRef, 2) = tmp13 + z1; Unsafe.Add(ref blockRef, 2) = tmp13 + z1;
Unsafe.Add(ref dataRef, 6) = tmp13 - z1; Unsafe.Add(ref blockRef, 6) = tmp13 - z1;
// Odd part // Odd part
tmp10 = tmp4 + tmp5; tmp10 = tmp4 + tmp5;
@ -168,26 +269,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
z11 = tmp7 + z3; z11 = tmp7 + z3;
z13 = tmp7 - z3; z13 = tmp7 - z3;
Unsafe.Add(ref dataRef, 5) = z13 + z2; Unsafe.Add(ref blockRef, 5) = z13 + z2;
Unsafe.Add(ref dataRef, 3) = z13 - z2; Unsafe.Add(ref blockRef, 3) = z13 - z2;
Unsafe.Add(ref dataRef, 1) = z11 + z4; Unsafe.Add(ref blockRef, 1) = z11 + z4;
Unsafe.Add(ref dataRef, 7) = z11 - z4; Unsafe.Add(ref blockRef, 7) = z11 - z4;
dataRef = ref Unsafe.Add(ref dataRef, dctSize); blockRef = ref Unsafe.Add(ref blockRef, dctSize);
} }
// Second pass - process columns // Second pass - process columns
dataRef = ref Unsafe.As<Block8x8F, float>(ref block); blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--) for (int ctr = 7; ctr >= 0; ctr--)
{ {
tmp0 = Unsafe.Add(ref dataRef, dctSize * 0) + Unsafe.Add(ref dataRef, dctSize * 7); tmp0 = Unsafe.Add(ref blockRef, dctSize * 0) + Unsafe.Add(ref blockRef, dctSize * 7);
tmp7 = Unsafe.Add(ref dataRef, dctSize * 0) - Unsafe.Add(ref dataRef, dctSize * 7); tmp7 = Unsafe.Add(ref blockRef, dctSize * 0) - Unsafe.Add(ref blockRef, dctSize * 7);
tmp1 = Unsafe.Add(ref dataRef, dctSize * 1) + Unsafe.Add(ref dataRef, dctSize * 6); tmp1 = Unsafe.Add(ref blockRef, dctSize * 1) + Unsafe.Add(ref blockRef, dctSize * 6);
tmp6 = Unsafe.Add(ref dataRef, dctSize * 1) - Unsafe.Add(ref dataRef, dctSize * 6); tmp6 = Unsafe.Add(ref blockRef, dctSize * 1) - Unsafe.Add(ref blockRef, dctSize * 6);
tmp2 = Unsafe.Add(ref dataRef, dctSize * 2) + Unsafe.Add(ref dataRef, dctSize * 5); tmp2 = Unsafe.Add(ref blockRef, dctSize * 2) + Unsafe.Add(ref blockRef, dctSize * 5);
tmp5 = Unsafe.Add(ref dataRef, dctSize * 2) - Unsafe.Add(ref dataRef, dctSize * 5); tmp5 = Unsafe.Add(ref blockRef, dctSize * 2) - Unsafe.Add(ref blockRef, dctSize * 5);
tmp3 = Unsafe.Add(ref dataRef, dctSize * 3) + Unsafe.Add(ref dataRef, dctSize * 4); tmp3 = Unsafe.Add(ref blockRef, dctSize * 3) + Unsafe.Add(ref blockRef, dctSize * 4);
tmp4 = Unsafe.Add(ref dataRef, dctSize * 3) - Unsafe.Add(ref dataRef, dctSize * 4); tmp4 = Unsafe.Add(ref blockRef, dctSize * 3) - Unsafe.Add(ref blockRef, dctSize * 4);
// Even part // Even part
tmp10 = tmp0 + tmp3; tmp10 = tmp0 + tmp3;
@ -195,12 +296,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
tmp11 = tmp1 + tmp2; tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2; tmp12 = tmp1 - tmp2;
Unsafe.Add(ref dataRef, dctSize * 0) = tmp10 + tmp11; Unsafe.Add(ref blockRef, dctSize * 0) = tmp10 + tmp11;
Unsafe.Add(ref dataRef, dctSize * 4) = tmp10 - tmp11; Unsafe.Add(ref blockRef, dctSize * 4) = tmp10 - tmp11;
z1 = (tmp12 + tmp13) * 0.707106781f; z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref dataRef, dctSize * 2) = tmp13 + z1; Unsafe.Add(ref blockRef, dctSize * 2) = tmp13 + z1;
Unsafe.Add(ref dataRef, dctSize * 6) = tmp13 - z1; Unsafe.Add(ref blockRef, dctSize * 6) = tmp13 - z1;
// Odd part // Odd part
tmp10 = tmp4 + tmp5; tmp10 = tmp4 + tmp5;
@ -215,12 +316,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
z11 = tmp7 + z3; z11 = tmp7 + z3;
z13 = tmp7 - z3; z13 = tmp7 - z3;
Unsafe.Add(ref dataRef, dctSize * 5) = z13 + z2; Unsafe.Add(ref blockRef, dctSize * 5) = z13 + z2;
Unsafe.Add(ref dataRef, dctSize * 3) = z13 - z2; Unsafe.Add(ref blockRef, dctSize * 3) = z13 - z2;
Unsafe.Add(ref dataRef, dctSize * 1) = z11 + z4; Unsafe.Add(ref blockRef, dctSize * 1) = z11 + z4;
Unsafe.Add(ref dataRef, dctSize * 7) = z11 - z4; Unsafe.Add(ref blockRef, dctSize * 7) = z11 - z4;
dataRef = ref Unsafe.Add(ref dataRef, 1); blockRef = ref Unsafe.Add(ref blockRef, 1);
} }
} }
@ -230,11 +331,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <remarks> /// <remarks>
/// This implementation must be called only if hardware supports 4 /// This implementation must be called only if hardware supports 4
/// floating point numbers vector. Otherwise explicit scalar /// floating point numbers vector. Otherwise explicit scalar
/// implementation <see cref="ForwardTransform_Scalar"/> is faster /// implementation <see cref="FDCT_Scalar"/> is faster
/// because it does not rely on matrix transposition. /// because it does not rely on block transposition.
/// </remarks> /// </remarks>
/// <param name="block">Input matrix.</param> /// <param name="block">Input block.</param>
private static void ForwardTransform_Vector4(ref Block8x8F block) public static void FDCT_Vector4(ref Block8x8F block)
{ {
DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware."); DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");
@ -247,209 +348,50 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
block.TransposeInplace(); block.TransposeInplace();
FDCT8x4_Vector4(ref block.V0L); FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R); FDCT8x4_Vector4(ref block.V0R);
}
/// <summary> // Applies 1D floating point FDCT inplace on 8x4 part of 8x8 block
/// Apply 1D floating point FDCT inplace on 8x4 part of 8x8 matrix. static void FDCT8x4_Vector4(ref Vector4 vecRef)
/// </summary> {
/// <remarks> Vector4 tmp0 = Unsafe.Add(ref vecRef, 0) + Unsafe.Add(ref vecRef, 14);
/// Implemented using Vector4 API operations for either scalar or sse hardware implementation. Vector4 tmp7 = Unsafe.Add(ref vecRef, 0) - Unsafe.Add(ref vecRef, 14);
/// Must be called on both 8x4 matrix parts for the full FDCT transform. Vector4 tmp1 = Unsafe.Add(ref vecRef, 2) + Unsafe.Add(ref vecRef, 12);
/// </remarks> Vector4 tmp6 = Unsafe.Add(ref vecRef, 2) - Unsafe.Add(ref vecRef, 12);
/// <param name="blockRef">Input reference to the first </param> Vector4 tmp2 = Unsafe.Add(ref vecRef, 4) + Unsafe.Add(ref vecRef, 10);
private static void FDCT8x4_Vector4(ref Vector4 blockRef) Vector4 tmp5 = Unsafe.Add(ref vecRef, 4) - Unsafe.Add(ref vecRef, 10);
{ Vector4 tmp3 = Unsafe.Add(ref vecRef, 6) + Unsafe.Add(ref vecRef, 8);
Vector4 tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 14); Vector4 tmp4 = Unsafe.Add(ref vecRef, 6) - Unsafe.Add(ref vecRef, 8);
Vector4 tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 14);
Vector4 tmp1 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 12);
Vector4 tmp6 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 12);
Vector4 tmp2 = Unsafe.Add(ref blockRef, 4) + Unsafe.Add(ref blockRef, 10);
Vector4 tmp5 = Unsafe.Add(ref blockRef, 4) - Unsafe.Add(ref blockRef, 10);
Vector4 tmp3 = Unsafe.Add(ref blockRef, 6) + Unsafe.Add(ref blockRef, 8);
Vector4 tmp4 = Unsafe.Add(ref blockRef, 6) - Unsafe.Add(ref blockRef, 8);
// Even part
Vector4 tmp10 = tmp0 + tmp3;
Vector4 tmp13 = tmp0 - tmp3;
Vector4 tmp11 = tmp1 + tmp2;
Vector4 tmp12 = tmp1 - tmp2;
Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref blockRef, 8) = tmp10 - tmp11;
Vector4 z1 = (tmp12 + tmp13) * mm128_F_0_7071;
Unsafe.Add(ref blockRef, 4) = tmp13 + z1;
Unsafe.Add(ref blockRef, 12) = tmp13 - z1;
// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
Vector4 z5 = (tmp10 - tmp12) * mm128_F_0_3826;
Vector4 z2 = (mm128_F_0_5411 * tmp10) + z5;
Vector4 z4 = (mm128_F_1_3065 * tmp12) + z5;
Vector4 z3 = tmp11 * mm128_F_0_7071;
Vector4 z11 = tmp7 + z3;
Vector4 z13 = tmp7 - z3;
Unsafe.Add(ref blockRef, 10) = z13 + z2;
Unsafe.Add(ref blockRef, 6) = z13 - z2;
Unsafe.Add(ref blockRef, 2) = z11 + z4;
Unsafe.Add(ref blockRef, 14) = z11 - z4;
}
/// <summary> // Even part
/// Apply floating point IDCT inplace. Vector4 tmp10 = tmp0 + tmp3;
/// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239. Vector4 tmp13 = tmp0 - tmp3;
/// </summary> Vector4 tmp11 = tmp1 + tmp2;
/// <param name="block">Input matrix.</param> Vector4 tmp12 = tmp1 - tmp2;
/// <param name="temp">Matrix to store temporal results.</param>
public static void TransformIDCT(ref Block8x8F block, ref Block8x8F temp)
{
block.TransposeInplace();
IDCT8x8(ref block, ref temp);
temp.TransposeInplace();
IDCT8x8(ref temp, ref block);
// TODO: This can be fused into quantization table step Unsafe.Add(ref vecRef, 0) = tmp10 + tmp11;
block.MultiplyInPlace(C_0_125); Unsafe.Add(ref vecRef, 8) = tmp10 - tmp11;
}
/// <summary> Vector4 z1 = (tmp12 + tmp13) * mm128_F_0_7071;
/// Performs 8x8 matrix Inverse Discrete Cosine Transform Unsafe.Add(ref vecRef, 4) = tmp13 + z1;
/// </summary> Unsafe.Add(ref vecRef, 12) = tmp13 - z1;
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
private static void IDCT8x8(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
IDCT8x8_Avx(ref s, ref d);
}
else
#endif
{
IDCT8x4_LeftPart(ref s, ref d);
IDCT8x4_RightPart(ref s, ref d);
}
}
/// <summary> // Odd part
/// Do IDCT internal operations on the left part of the block. Original src: tmp10 = tmp4 + tmp5;
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 tmp11 = tmp5 + tmp6;
/// </summary> tmp12 = tmp6 + tmp7;
/// <param name="s">The source block</param>
/// <param name="d">Destination block</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d)
{
Vector4 my1 = s.V1L;
Vector4 my7 = s.V7L;
Vector4 mz0 = my1 + my7;
Vector4 my3 = s.V3L;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5L;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2L;
Vector4 my6 = s.V6L;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0L;
Vector4 my4 = s.V4L;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0L = my0 + mb0;
d.V7L = my0 - mb0;
d.V1L = my1 + mb1;
d.V6L = my1 - mb1;
d.V2L = my2 + mb2;
d.V5L = my2 - mb2;
d.V3L = my3 + mb3;
d.V4L = my3 - mb3;
}
/// <summary> Vector4 z5 = (tmp10 - tmp12) * mm128_F_0_3826;
/// Do IDCT internal operations on the right part of the block. Vector4 z2 = (mm128_F_0_5411 * tmp10) + z5;
/// Original src: Vector4 z4 = (mm128_F_1_3065 * tmp12) + z5;
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 Vector4 z3 = tmp11 * mm128_F_0_7071;
/// </summary>
/// <param name="s">The source block</param> Vector4 z11 = tmp7 + z3;
/// <param name="d">The destination block</param> Vector4 z13 = tmp7 - z3;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) Unsafe.Add(ref vecRef, 10) = z13 + z2;
{ Unsafe.Add(ref vecRef, 6) = z13 - z2;
Vector4 my1 = s.V1R; Unsafe.Add(ref vecRef, 2) = z11 + z4;
Vector4 my7 = s.V7R; Unsafe.Add(ref vecRef, 14) = z11 - z4;
Vector4 mz0 = my1 + my7; }
Vector4 my3 = s.V3R;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5R;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2R;
Vector4 my6 = s.V6R;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0R;
Vector4 my4 = s.V4R;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0R = my0 + mb0;
d.V7R = my0 - mb0;
d.V1R = my1 + mb1;
d.V6R = my1 - mb1;
d.V2R = my2 + mb2;
d.V5R = my2 - mb2;
d.V3R = my3 + mb3;
d.V4R = my3 - mb3;
} }
} }
} }

29
src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs

@ -35,5 +35,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63 63, 63, 63, 63, 63, 63, 63, 63
}; };
/// <summary>
/// Gets span of zig-zag with fused transpose step ordering indices.
/// </summary>
/// <remarks>
/// When reading corrupted data, the Huffman decoders could attempt
/// to reference an entry beyond the end of this array (if the decoded
/// zero run length reaches past the end of the block). To prevent
/// wild stores without adding an inner-loop test, we put some extra
/// "63"s after the real entries. This will cause the extra coefficient
/// to be stored in location 63 of the block, not somewhere random.
/// The worst case would be a run-length of 15, which means we need 16
/// fake entries.
/// </remarks>
public static ReadOnlySpan<byte> TransposingOrder => new byte[]
{
0, 8, 1, 2, 9, 16, 24, 17,
10, 3, 4, 11, 18, 25, 32, 40,
33, 26, 19, 12, 5, 6, 13, 20,
27, 34, 41, 48, 56, 49, 42, 35,
28, 21, 14, 7, 15, 22, 29, 36,
43, 50, 57, 58, 51, 44, 37, 30,
23, 31, 38, 45, 52, 59, 60, 53,
46, 39, 47, 54, 61, 62, 55, 63,
// Extra entries for safety in decoder
63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63
};
} }
} }

3
src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs

@ -942,6 +942,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
break; break;
} }
} }
// Adjusting table for IDCT step during decompression
FastFloatingPointDCT.AdjustToIDCT(ref table);
} }
} }

2
src/ImageSharp/Formats/Webp/EntropyIx.cs

@ -6,7 +6,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
/// <summary> /// <summary>
/// These five modes are evaluated and their respective entropy is computed. /// These five modes are evaluated and their respective entropy is computed.
/// </summary> /// </summary>
internal enum EntropyIx internal enum EntropyIx : byte
{ {
Direct = 0, Direct = 0,

2
src/ImageSharp/Formats/Webp/HistoIx.cs

@ -3,7 +3,7 @@
namespace SixLabors.ImageSharp.Formats.Webp namespace SixLabors.ImageSharp.Formats.Webp
{ {
internal enum HistoIx internal enum HistoIx : byte
{ {
HistoAlpha = 0, HistoAlpha = 0,

49
src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs

@ -2,11 +2,13 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Buffers;
using System.Collections.Generic; using System.Collections.Generic;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
internal class BackwardReferenceEncoder internal static class BackwardReferenceEncoder
{ {
/// <summary> /// <summary>
/// Maximum bit length. /// Maximum bit length.
@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int quality, int quality,
int lz77TypesToTry, int lz77TypesToTry,
ref int cacheBits, ref int cacheBits,
MemoryAllocator memoryAllocator,
Vp8LHashChain hashChain, Vp8LHashChain hashChain,
Vp8LBackwardRefs best, Vp8LBackwardRefs best,
Vp8LBackwardRefs worst) Vp8LBackwardRefs worst)
@ -69,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
BackwardReferencesLz77(width, height, bgra, 0, hashChain, worst); BackwardReferencesLz77(width, height, bgra, 0, hashChain, worst);
break; break;
case Vp8LLz77Type.Lz77Box: case Vp8LLz77Type.Lz77Box:
hashChainBox = new Vp8LHashChain(width * height); hashChainBox = new Vp8LHashChain(memoryAllocator, width * height);
BackwardReferencesLz77Box(width, height, bgra, 0, hashChain, hashChainBox, worst); BackwardReferencesLz77Box(width, height, bgra, 0, hashChain, hashChainBox, worst);
break; break;
} }
@ -100,7 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if ((lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard || lz77TypeBest == (int)Vp8LLz77Type.Lz77Box) && quality >= 25) if ((lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard || lz77TypeBest == (int)Vp8LLz77Type.Lz77Box) && quality >= 25)
{ {
Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox;
BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst); BackwardReferencesTraceBackwards(width, height, memoryAllocator, bgra, cacheBits, hashChainTmp, best, worst);
var histo = new Vp8LHistogram(worst, cacheBits); var histo = new Vp8LHistogram(worst, cacheBits);
double bitCostTrace = histo.EstimateBits(stats, bitsEntropy); double bitCostTrace = histo.EstimateBits(stats, bitsEntropy);
if (bitCostTrace < bitCostBest) if (bitCostTrace < bitCostBest)
@ -111,6 +114,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
BackwardReferences2DLocality(width, best); BackwardReferences2DLocality(width, best);
hashChainBox?.Dispose();
return best; return best;
} }
@ -234,6 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static void BackwardReferencesTraceBackwards( private static void BackwardReferencesTraceBackwards(
int xSize, int xSize,
int ySize, int ySize,
MemoryAllocator memoryAllocator,
ReadOnlySpan<uint> bgra, ReadOnlySpan<uint> bgra,
int cacheBits, int cacheBits,
Vp8LHashChain hashChain, Vp8LHashChain hashChain,
@ -241,22 +247,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LBackwardRefs refsDst) Vp8LBackwardRefs refsDst)
{ {
int distArraySize = xSize * ySize; int distArraySize = xSize * ySize;
ushort[] distArray = new ushort[distArraySize]; using IMemoryOwner<ushort> distArrayBuffer = memoryAllocator.Allocate<ushort>(distArraySize);
Span<ushort> distArray = distArrayBuffer.GetSpan();
BackwardReferencesHashChainDistanceOnly(xSize, ySize, bgra, cacheBits, hashChain, refsSrc, distArray); BackwardReferencesHashChainDistanceOnly(xSize, ySize, memoryAllocator, bgra, cacheBits, hashChain, refsSrc, distArrayBuffer);
int chosenPathSize = TraceBackwards(distArray, distArraySize); int chosenPathSize = TraceBackwards(distArray, distArraySize);
Span<ushort> chosenPath = distArray.AsSpan(distArraySize - chosenPathSize); Span<ushort> chosenPath = distArray.Slice(distArraySize - chosenPathSize);
BackwardReferencesHashChainFollowChosenPath(bgra, cacheBits, chosenPath, chosenPathSize, hashChain, refsDst); BackwardReferencesHashChainFollowChosenPath(bgra, cacheBits, chosenPath, chosenPathSize, hashChain, refsDst);
} }
private static void BackwardReferencesHashChainDistanceOnly( private static void BackwardReferencesHashChainDistanceOnly(
int xSize, int xSize,
int ySize, int ySize,
MemoryAllocator memoryAllocator,
ReadOnlySpan<uint> bgra, ReadOnlySpan<uint> bgra,
int cacheBits, int cacheBits,
Vp8LHashChain hashChain, Vp8LHashChain hashChain,
Vp8LBackwardRefs refs, Vp8LBackwardRefs refs,
ushort[] distArray) IMemoryOwner<ushort> distArrayBuffer)
{ {
int pixCount = xSize * ySize; int pixCount = xSize * ySize;
bool useColorCache = cacheBits > 0; bool useColorCache = cacheBits > 0;
@ -275,22 +283,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
costModel.Build(xSize, cacheBits, refs); costModel.Build(xSize, cacheBits, refs);
var costManager = new CostManager(distArray, pixCount, costModel); using var costManager = new CostManager(memoryAllocator, distArrayBuffer, pixCount, costModel);
Span<float> costManagerCosts = costManager.Costs.GetSpan();
Span<ushort> distArray = distArrayBuffer.GetSpan();
// We loop one pixel at a time, but store all currently best points to non-processed locations from this point. // We loop one pixel at a time, but store all currently best points to non-processed locations from this point.
distArray[0] = 0; distArray[0] = 0;
// Add first pixel as literal. // Add first pixel as literal.
AddSingleLiteralWithCostModel(bgra, colorCache, costModel, 0, useColorCache, 0.0f, costManager.Costs, distArray); AddSingleLiteralWithCostModel(bgra, colorCache, costModel, 0, useColorCache, 0.0f, costManagerCosts, distArray);
for (int i = 1; i < pixCount; i++) for (int i = 1; i < pixCount; i++)
{ {
float prevCost = costManager.Costs[i - 1]; float prevCost = costManagerCosts[i - 1];
int offset = hashChain.FindOffset(i); int offset = hashChain.FindOffset(i);
int len = hashChain.FindLength(i); int len = hashChain.FindLength(i);
// Try adding the pixel as a literal. // Try adding the pixel as a literal.
AddSingleLiteralWithCostModel(bgra, colorCache, costModel, i, useColorCache, prevCost, costManager.Costs, distArray); AddSingleLiteralWithCostModel(bgra, colorCache, costModel, i, useColorCache, prevCost, costManagerCosts, distArray);
// If we are dealing with a non-literal. // If we are dealing with a non-literal.
if (len >= 2) if (len >= 2)
@ -334,7 +344,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
costManager.UpdateCostAtIndex(j - 1, false); costManager.UpdateCostAtIndex(j - 1, false);
costManager.UpdateCostAtIndex(j, false); costManager.UpdateCostAtIndex(j, false);
costManager.PushInterval(costManager.Costs[j - 1] + offsetCost, j, lenJ); costManager.PushInterval(costManagerCosts[j - 1] + offsetCost, j, lenJ);
reach = j + lenJ - 1; reach = j + lenJ - 1;
} }
} }
@ -346,7 +356,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
private static int TraceBackwards(ushort[] distArray, int distArraySize) private static int TraceBackwards(Span<ushort> distArray, int distArraySize)
{ {
int chosenPathSize = 0; int chosenPathSize = 0;
int pathPos = distArraySize; int pathPos = distArraySize;
@ -426,8 +436,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int idx, int idx,
bool useColorCache, bool useColorCache,
float prevCost, float prevCost,
float[] cost, Span<float> cost,
ushort[] distArray) Span<ushort> distArray)
{ {
double costVal = prevCost; double costVal = prevCost;
uint color = bgra[idx]; uint color = bgra[idx];
@ -617,7 +627,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
hashChain.OffsetLength[0] = 0; Span<uint> hashChainOffsetLength = hashChain.OffsetLength.GetSpan();
hashChainOffsetLength[0] = 0;
for (i = 1; i < pixelCount; i++) for (i = 1; i < pixelCount; i++)
{ {
int ind; int ind;
@ -695,19 +706,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (bestLength <= MinLength) if (bestLength <= MinLength)
{ {
hashChain.OffsetLength[i] = 0; hashChainOffsetLength[i] = 0;
bestOffsetPrev = 0; bestOffsetPrev = 0;
bestLengthPrev = 0; bestLengthPrev = 0;
} }
else else
{ {
hashChain.OffsetLength[i] = (uint)((bestOffset << MaxLengthBits) | bestLength); hashChainOffsetLength[i] = (uint)((bestOffset << MaxLengthBits) | bestLength);
bestOffsetPrev = bestOffset; bestOffsetPrev = bestOffset;
bestLengthPrev = bestLength; bestLengthPrev = bestLength;
} }
} }
hashChain.OffsetLength[0] = 0; hashChainOffsetLength[0] = 0;
BackwardReferencesLz77(xSize, ySize, bgra, cacheBits, hashChain, refs); BackwardReferencesLz77(xSize, ySize, bgra, cacheBits, hashChain, refs);
} }

74
src/ImageSharp/Formats/Webp/Lossless/CostManager.cs

@ -1,7 +1,10 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers;
using System.Collections.Generic; using System.Collections.Generic;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
@ -10,20 +13,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// It caches the different CostCacheInterval, caches the different /// It caches the different CostCacheInterval, caches the different
/// GetLengthCost(costModel, k) in costCache and the CostInterval's. /// GetLengthCost(costModel, k) in costCache and the CostInterval's.
/// </summary> /// </summary>
internal class CostManager internal sealed class CostManager : IDisposable
{ {
private CostInterval head; private CostInterval head;
public CostManager(ushort[] distArray, int pixCount, CostModel costModel) private const int FreeIntervalsStartCount = 25;
private readonly Stack<CostInterval> freeIntervals = new(FreeIntervalsStartCount);
public CostManager(MemoryAllocator memoryAllocator, IMemoryOwner<ushort> distArray, int pixCount, CostModel costModel)
{ {
int costCacheSize = pixCount > BackwardReferenceEncoder.MaxLength ? BackwardReferenceEncoder.MaxLength : pixCount; int costCacheSize = pixCount > BackwardReferenceEncoder.MaxLength ? BackwardReferenceEncoder.MaxLength : pixCount;
this.CacheIntervals = new List<CostCacheInterval>(); this.CacheIntervals = new List<CostCacheInterval>();
this.CostCache = new List<double>(); this.CostCache = new List<double>();
this.Costs = new float[pixCount]; this.Costs = memoryAllocator.Allocate<float>(pixCount);
this.DistArray = distArray; this.DistArray = distArray;
this.Count = 0; this.Count = 0;
for (int i = 0; i < FreeIntervalsStartCount; i++)
{
this.freeIntervals.Push(new CostInterval());
}
// Fill in the cost cache. // Fill in the cost cache.
this.CacheIntervalsSize++; this.CacheIntervalsSize++;
this.CostCache.Add(costModel.GetLengthCost(0)); this.CostCache.Add(costModel.GetLengthCost(0));
@ -64,10 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
// Set the initial costs high for every pixel as we will keep the minimum. // Set the initial costs high for every pixel as we will keep the minimum.
for (int i = 0; i < pixCount; i++) this.Costs.GetSpan().Fill(1e38f);
{
this.Costs[i] = 1e38f;
}
} }
/// <summary> /// <summary>
@ -82,9 +91,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public int CacheIntervalsSize { get; } public int CacheIntervalsSize { get; }
public float[] Costs { get; } public IMemoryOwner<float> Costs { get; }
public ushort[] DistArray { get; } public IMemoryOwner<ushort> DistArray { get; }
public List<CostCacheInterval> CacheIntervals { get; } public List<CostCacheInterval> CacheIntervals { get; }
@ -128,6 +137,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// interval logic, just serialize it right away. This constant is empirical. // interval logic, just serialize it right away. This constant is empirical.
int skipDistance = 10; int skipDistance = 10;
Span<float> costs = this.Costs.GetSpan();
Span<ushort> distArray = this.DistArray.GetSpan();
if (len < skipDistance) if (len < skipDistance)
{ {
for (int j = position; j < position + len; j++) for (int j = position; j < position + len; j++)
@ -135,10 +146,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int k = j - position; int k = j - position;
float costTmp = (float)(distanceCost + this.CostCache[k]); float costTmp = (float)(distanceCost + this.CostCache[k]);
if (this.Costs[j] > costTmp) if (costs[j] > costTmp)
{ {
this.Costs[j] = costTmp; costs[j] = costTmp;
this.DistArray[j] = (ushort)(k + 1); distArray[j] = (ushort)(k + 1);
} }
} }
@ -201,10 +212,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.InsertInterval(interval, interval.Cost, interval.Index, end, endOriginal); this.InsertInterval(interval, interval.Cost, interval.Index, end, endOriginal);
break; break;
} }
else
{ interval.End = start;
interval.End = start;
}
} }
} }
@ -226,6 +235,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.ConnectIntervals(interval.Previous, interval.Next); this.ConnectIntervals(interval.Previous, interval.Next);
this.Count--; this.Count--;
interval.Next = null;
interval.Previous = null;
this.freeIntervals.Push(interval);
} }
private void InsertInterval(CostInterval intervalIn, float cost, int position, int start, int end) private void InsertInterval(CostInterval intervalIn, float cost, int position, int start, int end)
@ -236,13 +249,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
// TODO: should we use COST_CACHE_INTERVAL_SIZE_MAX? // TODO: should we use COST_CACHE_INTERVAL_SIZE_MAX?
var intervalNew = new CostInterval() CostInterval intervalNew;
if (this.freeIntervals.Count > 0)
{ {
Cost = cost, intervalNew = this.freeIntervals.Pop();
Start = start, intervalNew.Cost = cost;
End = end, intervalNew.Start = start;
Index = position intervalNew.End = end;
}; intervalNew.Index = position;
}
else
{
intervalNew = new CostInterval() { Cost = cost, Start = start, End = end, Index = position };
}
this.PositionOrphanInterval(intervalNew, intervalIn); this.PositionOrphanInterval(intervalNew, intervalIn);
this.Count++; this.Count++;
@ -297,12 +316,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
private void UpdateCost(int i, int position, float cost) private void UpdateCost(int i, int position, float cost)
{ {
Span<float> costs = this.Costs.GetSpan();
Span<ushort> distArray = this.DistArray.GetSpan();
int k = i - position; int k = i - position;
if (this.Costs[i] > cost) if (costs[i] > cost)
{ {
this.Costs[i] = cost; costs[i] = cost;
this.DistArray[i] = (ushort)(k + 1); distArray[i] = (ushort)(k + 1);
} }
} }
/// <inheritdoc />
public void Dispose() => this.Costs.Dispose();
} }
} }

10
src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs

@ -13,16 +13,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// - UsePackedTable: few enough literal symbols, so all the bit codes can fit into a small look-up table PackedTable[] /// - UsePackedTable: few enough literal symbols, so all the bit codes can fit into a small look-up table PackedTable[]
/// The common literal base, if applicable, is stored in 'LiteralArb'. /// The common literal base, if applicable, is stored in 'LiteralArb'.
/// </summary> /// </summary>
internal class HTreeGroup internal struct HTreeGroup
{ {
public HTreeGroup(uint packedTableSize) public HTreeGroup(uint packedTableSize)
{ {
this.HTrees = new List<HuffmanCode[]>(WebpConstants.HuffmanCodesPerMetaCode); this.HTrees = new List<HuffmanCode[]>(WebpConstants.HuffmanCodesPerMetaCode);
this.PackedTable = new HuffmanCode[packedTableSize]; this.PackedTable = new HuffmanCode[packedTableSize];
for (int i = 0; i < packedTableSize; i++) this.IsTrivialCode = false;
{ this.IsTrivialLiteral = false;
this.PackedTable[i] = new HuffmanCode(); this.LiteralArb = 0;
} this.UsePackedTable = false;
} }
/// <summary> /// <summary>

2
src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs

@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// A classic way to do entropy coding where a smaller number of bits are used for more frequent codes. /// A classic way to do entropy coding where a smaller number of bits are used for more frequent codes.
/// </summary> /// </summary>
[DebuggerDisplay("BitsUsed: {BitsUsed}, Value: {Value}")] [DebuggerDisplay("BitsUsed: {BitsUsed}, Value: {Value}")]
internal class HuffmanCode internal struct HuffmanCode
{ {
/// <summary> /// <summary>
/// Gets or sets the number of bits used for this symbol. /// Gets or sets the number of bits used for this symbol.

4
src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs

@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Represents the Huffman tree. /// Represents the Huffman tree.
/// </summary> /// </summary>
[DebuggerDisplay("TotalCount = {TotalCount}, Value = {Value}, Left = {PoolIndexLeft}, Right = {PoolIndexRight}")] [DebuggerDisplay("TotalCount = {TotalCount}, Value = {Value}, Left = {PoolIndexLeft}, Right = {PoolIndexRight}")]
internal struct HuffmanTree : IDeepCloneable internal struct HuffmanTree
{ {
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="HuffmanTree"/> struct. /// Initializes a new instance of the <see cref="HuffmanTree"/> struct.
@ -57,7 +57,5 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return t1.Value < t2.Value ? -1 : 1; return t1.Value < t2.Value ? -1 : 1;
} }
public IDeepCloneable DeepClone() => new HuffmanTree(this);
} }
} }

51
src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
@ -218,8 +219,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
while (treeSize > 1) while (treeSize > 1)
{ {
// Finish when we have only one root. // Finish when we have only one root.
treePool[treePoolSize++] = (HuffmanTree)tree[treeSize - 1].DeepClone(); treePool[treePoolSize++] = tree[treeSize - 1];
treePool[treePoolSize++] = (HuffmanTree)tree[treeSize - 2].DeepClone(); treePool[treePoolSize++] = tree[treeSize - 2];
int count = treePool[treePoolSize - 1].TotalCount + treePool[treePoolSize - 2].TotalCount; int count = treePool[treePoolSize - 1].TotalCount + treePool[treePoolSize - 2].TotalCount;
treeSize -= 2; treeSize -= 2;
@ -238,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int startIdx = endIdx + num - 1; int startIdx = endIdx + num - 1;
for (int i = startIdx; i >= endIdx; i--) for (int i = startIdx; i >= endIdx; i--)
{ {
tree[i] = (HuffmanTree)tree[i - 1].DeepClone(); tree[i] = tree[i - 1];
} }
tree[k].TotalCount = count; tree[k].TotalCount = count;
@ -307,9 +308,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static int BuildHuffmanTable(Span<HuffmanCode> table, int rootBits, int[] codeLengths, int codeLengthsSize) public static int BuildHuffmanTable(Span<HuffmanCode> table, int rootBits, int[] codeLengths, int codeLengthsSize)
{ {
Guard.MustBeGreaterThan(rootBits, 0, nameof(rootBits)); DebugGuard.MustBeGreaterThan(rootBits, 0, nameof(rootBits));
Guard.NotNull(codeLengths, nameof(codeLengths)); DebugGuard.NotNull(codeLengths, nameof(codeLengths));
Guard.MustBeGreaterThan(codeLengthsSize, 0, nameof(codeLengthsSize)); DebugGuard.MustBeGreaterThan(codeLengthsSize, 0, nameof(codeLengthsSize));
// sorted[codeLengthsSize] is a pre-allocated array for sorting symbols by code length. // sorted[codeLengthsSize] is a pre-allocated array for sorting symbols by code length.
int[] sorted = new int[codeLengthsSize]; int[] sorted = new int[codeLengthsSize];
@ -467,27 +468,27 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
break; break;
} }
else if (repetitions < 11)
if (repetitions < 11)
{ {
tokens[pos].Code = 17; tokens[pos].Code = 17;
tokens[pos].ExtraBits = (byte)(repetitions - 3); tokens[pos].ExtraBits = (byte)(repetitions - 3);
pos++; pos++;
break; break;
} }
else if (repetitions < 139)
if (repetitions < 139)
{ {
tokens[pos].Code = 18; tokens[pos].Code = 18;
tokens[pos].ExtraBits = (byte)(repetitions - 11); tokens[pos].ExtraBits = (byte)(repetitions - 11);
pos++; pos++;
break; break;
} }
else
{ tokens[pos].Code = 18;
tokens[pos].Code = 18; tokens[pos].ExtraBits = 0x7f; // 138 repeated 0s
tokens[pos].ExtraBits = 0x7f; // 138 repeated 0s pos++;
pos++; repetitions -= 138;
repetitions -= 138;
}
} }
return pos; return pos;
@ -519,20 +520,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
break; break;
} }
else if (repetitions < 7)
if (repetitions < 7)
{ {
tokens[pos].Code = 16; tokens[pos].Code = 16;
tokens[pos].ExtraBits = (byte)(repetitions - 3); tokens[pos].ExtraBits = (byte)(repetitions - 3);
pos++; pos++;
break; break;
} }
else
{ tokens[pos].Code = 16;
tokens[pos].Code = 16; tokens[pos].ExtraBits = 3;
tokens[pos].ExtraBits = 3; pos++;
pos++; repetitions -= 6;
repetitions -= 6;
}
} }
return pos; return pos;
@ -541,7 +541,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary> /// <summary>
/// Get the actual bit values for a tree of bit depths. /// Get the actual bit values for a tree of bit depths.
/// </summary> /// </summary>
/// <param name="tree">The hiffman tree.</param> /// <param name="tree">The huffman tree.</param>
private static void ConvertBitDepthsToSymbols(HuffmanTreeCode tree) private static void ConvertBitDepthsToSymbols(HuffmanTreeCode tree)
{ {
// 0 bit-depth means that the symbol does not exist. // 0 bit-depth means that the symbol does not exist.
@ -628,7 +628,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
private static void ReplicateValue(Span<HuffmanCode> table, int step, int end, HuffmanCode code) private static void ReplicateValue(Span<HuffmanCode> table, int step, int end, HuffmanCode code)
{ {
Guard.IsTrue(end % step == 0, nameof(end), "end must be a multiple of step"); DebugGuard.IsTrue(end % step == 0, nameof(end), "end must be a multiple of step");
do do
{ {
@ -656,6 +656,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary> /// <summary>
/// Heuristics for selecting the stride ranges to collapse. /// Heuristics for selecting the stride ranges to collapse.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private static bool ValuesShouldBeCollapsedToStrideAverage(int a, int b) => Math.Abs(a - b) < 4; private static bool ValuesShouldBeCollapsedToStrideAverage(int a, int b) => Math.Abs(a - b) < 4;
} }
} }

2
src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs

@ -6,7 +6,7 @@ using System.Diagnostics;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
[DebuggerDisplay("Mode: {Mode}, Len: {Len}, BgraOrDistance: {BgraOrDistance}")] [DebuggerDisplay("Mode: {Mode}, Len: {Len}, BgraOrDistance: {BgraOrDistance}")]
internal class PixOrCopy internal sealed class PixOrCopy
{ {
public PixOrCopyMode Mode { get; set; } public PixOrCopyMode Mode { get; set; }

2
src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs

@ -3,7 +3,7 @@
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
internal enum PixOrCopyMode internal enum PixOrCopyMode : byte
{ {
Literal, Literal,

2
src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs

@ -7,7 +7,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
internal class Vp8LBackwardRefs internal class Vp8LBackwardRefs
{ {
public Vp8LBackwardRefs() => this.Refs = new List<PixOrCopy>(); public Vp8LBackwardRefs(int pixels) => this.Refs = new List<PixOrCopy>(pixels);
/// <summary> /// <summary>
/// Gets or sets the common block-size. /// Gets or sets the common block-size.

26
src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs

@ -124,19 +124,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.EncodedData = memoryAllocator.Allocate<uint>(pixelCount); this.EncodedData = memoryAllocator.Allocate<uint>(pixelCount);
this.Palette = memoryAllocator.Allocate<uint>(WebpConstants.MaxPaletteSize); this.Palette = memoryAllocator.Allocate<uint>(WebpConstants.MaxPaletteSize);
this.Refs = new Vp8LBackwardRefs[3]; this.Refs = new Vp8LBackwardRefs[3];
this.HashChain = new Vp8LHashChain(pixelCount); this.HashChain = new Vp8LHashChain(memoryAllocator, pixelCount);
// We round the block size up, so we're guaranteed to have at most MaxRefsBlockPerImage blocks used: // We round the block size up, so we're guaranteed to have at most MaxRefsBlockPerImage blocks used:
int refsBlockSize = ((pixelCount - 1) / MaxRefsBlockPerImage) + 1; int refsBlockSize = ((pixelCount - 1) / MaxRefsBlockPerImage) + 1;
for (int i = 0; i < this.Refs.Length; i++) for (int i = 0; i < this.Refs.Length; i++)
{ {
this.Refs[i] = new Vp8LBackwardRefs this.Refs[i] = new Vp8LBackwardRefs(pixelCount)
{ {
BlockSize = refsBlockSize < MinBlockSize ? MinBlockSize : refsBlockSize BlockSize = refsBlockSize < MinBlockSize ? MinBlockSize : refsBlockSize
}; };
} }
} }
// RFC 1951 will calm you down if you are worried about this funny sequence.
// This sequence is tuned from that, but more weighted for lower symbol count,
// and more spiking histograms.
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> StorageOrder => new byte[] { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// This uses C#'s compiler optimization to refer to assembly's static data directly. // This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> Order => new byte[] { 1, 2, 0, 3 }; private static ReadOnlySpan<byte> Order => new byte[] { 1, 2, 0, 3 };
@ -515,7 +521,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
// Calculate backward references from BGRA image. // Calculate backward references from BGRA image.
this.HashChain.Fill(this.memoryAllocator, bgra, this.quality, width, height, lowEffort); this.HashChain.Fill(bgra, this.quality, width, height, lowEffort);
Vp8LBitWriter bitWriterBest = config.SubConfigs.Count > 1 ? this.bitWriter.Clone() : this.bitWriter; Vp8LBitWriter bitWriterBest = config.SubConfigs.Count > 1 ? this.bitWriter.Clone() : this.bitWriter;
Vp8LBitWriter bwInit = this.bitWriter; Vp8LBitWriter bwInit = this.bitWriter;
@ -529,6 +535,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.quality, this.quality,
subConfig.Lz77, subConfig.Lz77,
ref cacheBits, ref cacheBits,
this.memoryAllocator,
this.HashChain, this.HashChain,
this.Refs[0], this.Refs[0],
this.Refs[1]); this.Refs[1]);
@ -735,7 +742,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
// Calculate backward references from the image pixels. // Calculate backward references from the image pixels.
hashChain.Fill(this.memoryAllocator, bgra, quality, width, height, lowEffort); hashChain.Fill(bgra, quality, width, height, lowEffort);
Vp8LBackwardRefs refs = BackwardReferenceEncoder.GetBackwardReferences( Vp8LBackwardRefs refs = BackwardReferenceEncoder.GetBackwardReferences(
width, width,
@ -744,6 +751,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
quality, quality,
(int)Vp8LLz77Type.Lz77Standard | (int)Vp8LLz77Type.Lz77Rle, (int)Vp8LLz77Type.Lz77Standard | (int)Vp8LLz77Type.Lz77Rle,
ref cacheBits, ref cacheBits,
this.memoryAllocator,
hashChain, hashChain,
refsTmp1, refsTmp1,
refsTmp2); refsTmp2);
@ -940,16 +948,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private void StoreHuffmanTreeOfHuffmanTreeToBitMask(byte[] codeLengthBitDepth) private void StoreHuffmanTreeOfHuffmanTreeToBitMask(byte[] codeLengthBitDepth)
{ {
// RFC 1951 will calm you down if you are worried about this funny sequence.
// This sequence is tuned from that, but more weighted for lower symbol count,
// and more spiking histograms.
byte[] storageOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// Throw away trailing zeros: // Throw away trailing zeros:
int codesToStore = WebpConstants.CodeLengthCodes; int codesToStore = WebpConstants.CodeLengthCodes;
for (; codesToStore > 4; codesToStore--) for (; codesToStore > 4; codesToStore--)
{ {
if (codeLengthBitDepth[storageOrder[codesToStore - 1]] != 0) if (codeLengthBitDepth[StorageOrder[codesToStore - 1]] != 0)
{ {
break; break;
} }
@ -958,7 +961,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.bitWriter.PutBits((uint)codesToStore - 4, 4); this.bitWriter.PutBits((uint)codesToStore - 4, 4);
for (int i = 0; i < codesToStore; i++) for (int i = 0; i < codesToStore; i++)
{ {
this.bitWriter.PutBits(codeLengthBitDepth[storageOrder[i]], 3); this.bitWriter.PutBits(codeLengthBitDepth[StorageOrder[i]], 3);
} }
} }
@ -1802,6 +1805,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.BgraScratch.Dispose(); this.BgraScratch.Dispose();
this.Palette.Dispose(); this.Palette.Dispose();
this.TransformData.Dispose(); this.TransformData.Dispose();
this.HashChain.Dispose();
} }
} }
} }

40
src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs

@ -8,7 +8,7 @@ using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
internal class Vp8LHashChain internal sealed class Vp8LHashChain : IDisposable
{ {
private const uint HashMultiplierHi = 0xc6a4a793u; private const uint HashMultiplierHi = 0xc6a4a793u;
@ -28,14 +28,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
private const int WindowSize = (1 << WindowSizeBits) - 120; private const int WindowSize = (1 << WindowSizeBits) - 120;
private readonly MemoryAllocator memoryAllocator;
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Vp8LHashChain"/> class. /// Initializes a new instance of the <see cref="Vp8LHashChain"/> class.
/// </summary> /// </summary>
/// <param name="memoryAllocator">The memory allocator.</param>
/// <param name="size">The size off the chain.</param> /// <param name="size">The size off the chain.</param>
public Vp8LHashChain(int size) public Vp8LHashChain(MemoryAllocator memoryAllocator, int size)
{ {
this.OffsetLength = new uint[size]; this.memoryAllocator = memoryAllocator;
this.OffsetLength.AsSpan().Fill(0xcdcdcdcd); this.OffsetLength = this.memoryAllocator.Allocate<uint>(size, AllocationOptions.Clean);
this.Size = size; this.Size = size;
} }
@ -45,16 +48,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// These 20 bits are the limit defined by GetWindowSizeForHashChain (through WindowSize = 1 &lt;&lt; 20). /// These 20 bits are the limit defined by GetWindowSizeForHashChain (through WindowSize = 1 &lt;&lt; 20).
/// The lower 12 bits contain the length of the match. /// The lower 12 bits contain the length of the match.
/// </summary> /// </summary>
public uint[] OffsetLength { get; } public IMemoryOwner<uint> OffsetLength { get; }
/// <summary> /// <summary>
/// Gets the size of the hash chain. /// Gets the size of the hash chain.
/// This is the maximum size of the hash_chain that can be constructed. /// This is the maximum size of the hashchain that can be constructed.
/// Typically this is the pixel count (width x height) for a given image. /// Typically this is the pixel count (width x height) for a given image.
/// </summary> /// </summary>
public int Size { get; } public int Size { get; }
public void Fill(MemoryAllocator memoryAllocator, ReadOnlySpan<uint> bgra, int quality, int xSize, int ySize, bool lowEffort) public void Fill(ReadOnlySpan<uint> bgra, int quality, int xSize, int ySize, bool lowEffort)
{ {
int size = xSize * ySize; int size = xSize * ySize;
int iterMax = GetMaxItersForQuality(quality); int iterMax = GetMaxItersForQuality(quality);
@ -63,20 +66,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (size <= 2) if (size <= 2)
{ {
this.OffsetLength[0] = 0; this.OffsetLength.GetSpan()[0] = 0;
return; return;
} }
using IMemoryOwner<int> hashToFirstIndexBuffer = memoryAllocator.Allocate<int>(HashSize); using IMemoryOwner<int> hashToFirstIndexBuffer = this.memoryAllocator.Allocate<int>(HashSize);
using IMemoryOwner<int> chainBuffer = this.memoryAllocator.Allocate<int>(size, AllocationOptions.Clean);
Span<int> hashToFirstIndex = hashToFirstIndexBuffer.GetSpan(); Span<int> hashToFirstIndex = hashToFirstIndexBuffer.GetSpan();
Span<int> chain = chainBuffer.GetSpan();
// Initialize hashToFirstIndex array to -1. // Initialize hashToFirstIndex array to -1.
hashToFirstIndex.Fill(-1); hashToFirstIndex.Fill(-1);
int[] chain = new int[size];
// Fill the chain linking pixels with the same hash. // Fill the chain linking pixels with the same hash.
bool bgraComp = bgra.Length > 1 && bgra[0] == bgra[1]; bool bgraComp = bgra.Length > 1 && bgra[0] == bgra[1];
Span<uint> tmp = stackalloc uint[2];
for (pos = 0; pos < size - 2;) for (pos = 0; pos < size - 2;)
{ {
uint hashCode; uint hashCode;
@ -85,7 +89,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
// Consecutive pixels with the same color will share the same hash. // Consecutive pixels with the same color will share the same hash.
// We therefore use a different hash: the color and its repetition length. // We therefore use a different hash: the color and its repetition length.
uint[] tmp = new uint[2]; tmp.Clear();
uint len = 1; uint len = 1;
tmp[0] = bgra[pos]; tmp[0] = bgra[pos];
@ -134,7 +138,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Find the best match interval at each pixel, defined by an offset to the // Find the best match interval at each pixel, defined by an offset to the
// pixel and a length. The right-most pixel cannot match anything to the right // pixel and a length. The right-most pixel cannot match anything to the right
// (hence a best length of 0) and the left-most pixel nothing to the left (hence an offset of 0). // (hence a best length of 0) and the left-most pixel nothing to the left (hence an offset of 0).
this.OffsetLength[0] = this.OffsetLength[size - 1] = 0; Span<uint> offsetLength = this.OffsetLength.GetSpan();
offsetLength[0] = offsetLength[size - 1] = 0;
for (int basePosition = size - 2; basePosition > 0;) for (int basePosition = size - 2; basePosition > 0;)
{ {
int maxLen = LosslessUtils.MaxFindCopyLength(size - 1 - basePosition); int maxLen = LosslessUtils.MaxFindCopyLength(size - 1 - basePosition);
@ -208,7 +213,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint maxBasePosition = (uint)basePosition; uint maxBasePosition = (uint)basePosition;
while (true) while (true)
{ {
this.OffsetLength[basePosition] = (bestDistance << BackwardReferenceEncoder.MaxLengthBits) | (uint)bestLength; offsetLength[basePosition] = (bestDistance << BackwardReferenceEncoder.MaxLengthBits) | (uint)bestLength;
--basePosition; --basePosition;
// Stop if we don't have a match or if we are out of bounds. // Stop if we don't have a match or if we are out of bounds.
@ -242,10 +247,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public int FindLength(int basePosition) => (int)(this.OffsetLength[basePosition] & ((1U << BackwardReferenceEncoder.MaxLengthBits) - 1)); public int FindLength(int basePosition) => (int)(this.OffsetLength.GetSpan()[basePosition] & ((1U << BackwardReferenceEncoder.MaxLengthBits) - 1));
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public int FindOffset(int basePosition) => (int)(this.OffsetLength[basePosition] >> BackwardReferenceEncoder.MaxLengthBits); public int FindOffset(int basePosition) => (int)(this.OffsetLength.GetSpan()[basePosition] >> BackwardReferenceEncoder.MaxLengthBits);
/// <summary> /// <summary>
/// Calculates the hash for a pixel pair. /// Calculates the hash for a pixel pair.
@ -280,5 +285,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return maxWindowSize > WindowSize ? WindowSize : maxWindowSize; return maxWindowSize > WindowSize ? WindowSize : maxWindowSize;
} }
/// <inheritdoc />
public void Dispose() => this.OffsetLength.Dispose();
} }
} }

27
src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs

@ -65,15 +65,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
FixedTableSize + 2704 FixedTableSize + 2704
}; };
private static readonly byte[] CodeLengthCodeOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
private static readonly int NumCodeLengthCodes = CodeLengthCodeOrder.Length; private static readonly int NumCodeLengthCodes = CodeLengthCodeOrder.Length;
private static readonly byte[] LiteralMap =
{
0, 1, 1, 1, 0
};
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="WebpLosslessDecoder"/> class. /// Initializes a new instance of the <see cref="WebpLosslessDecoder"/> class.
/// </summary> /// </summary>
@ -87,6 +80,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.configuration = configuration; this.configuration = configuration;
} }
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> CodeLengthCodeOrder => new byte[] { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> LiteralMap => new byte[] { 0, 1, 1, 1, 0 };
/// <summary> /// <summary>
/// Decodes the image from the stream using the bitreader. /// Decodes the image from the stream using the bitreader.
/// </summary> /// </summary>
@ -834,10 +833,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private void BuildPackedTable(HTreeGroup hTreeGroup) private void BuildPackedTable(HTreeGroup hTreeGroup)
{ {
for (uint code = 0; code < HuffmanUtils.HuffmanPackedTableSize; ++code) for (uint code = 0; code < HuffmanUtils.HuffmanPackedTableSize; code++)
{ {
uint bits = code; uint bits = code;
HuffmanCode huff = hTreeGroup.PackedTable[bits]; ref HuffmanCode huff = ref hTreeGroup.PackedTable[bits];
HuffmanCode hCode = hTreeGroup.HTrees[HuffIndex.Green][bits]; HuffmanCode hCode = hTreeGroup.HTrees[HuffIndex.Green][bits];
if (hCode.Value >= WebpConstants.NumLiteralCodes) if (hCode.Value >= WebpConstants.NumLiteralCodes)
{ {
@ -848,10 +847,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
huff.BitsUsed = 0; huff.BitsUsed = 0;
huff.Value = 0; huff.Value = 0;
bits >>= AccumulateHCode(hCode, 8, huff); bits >>= AccumulateHCode(hCode, 8, ref huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Red][bits], 16, huff); bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Red][bits], 16, ref huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Blue][bits], 0, huff); bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Blue][bits], 0, ref huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Alpha][bits], 24, huff); bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Alpha][bits], 24, ref huff);
} }
} }
} }
@ -992,7 +991,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static int AccumulateHCode(HuffmanCode hCode, int shift, HuffmanCode huff) private static int AccumulateHCode(HuffmanCode hCode, int shift, ref HuffmanCode huff)
{ {
huff.BitsUsed += hCode.BitsUsed; huff.BitsUsed += hCode.BitsUsed;
huff.Value |= hCode.Value << shift; huff.Value |= hCode.Value << shift;

3
src/ImageSharp/Formats/Webp/WebpLookupTables.cs

@ -239,7 +239,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
} }
}; };
public static readonly byte[] Norm = // This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> Norm => new byte[]
{ {
// renorm_sizes[i] = 8 - log2(i) // renorm_sizes[i] = 8 - log2(i)
7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,

2
src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs

@ -16,7 +16,7 @@ namespace SixLabors.ImageSharp.Processing
/// <param name="source">The image this method extends.</param> /// <param name="source">The image this method extends.</param>
/// <returns>The <see cref="IImageProcessingContext"/> to allow chaining of operations.</returns> /// <returns>The <see cref="IImageProcessingContext"/> to allow chaining of operations.</returns>
public static IImageProcessingContext HistogramEqualization(this IImageProcessingContext source) => public static IImageProcessingContext HistogramEqualization(this IImageProcessingContext source) =>
HistogramEqualization(source, HistogramEqualizationOptions.Default); HistogramEqualization(source, new HistogramEqualizationOptions());
/// <summary> /// <summary>
/// Equalizes the histogram of an image to increases the contrast. /// Equalizes the histogram of an image to increases the contrast.

5
src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs

@ -8,11 +8,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
/// </summary> /// </summary>
public class HistogramEqualizationOptions public class HistogramEqualizationOptions
{ {
/// <summary>
/// Gets the default <see cref="HistogramEqualizationOptions"/> instance.
/// </summary>
public static HistogramEqualizationOptions Default { get; } = new HistogramEqualizationOptions();
/// <summary> /// <summary>
/// Gets or sets the histogram equalization method to use. Defaults to global histogram equalization. /// Gets or sets the histogram equalization method to use. Defaults to global histogram equalization.
/// </summary> /// </summary>

4
src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs

@ -22,7 +22,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization
where TPixel : unmanaged, IPixel<TPixel> where TPixel : unmanaged, IPixel<TPixel>
{ {
private Rgba32[] rgbaPalette; private Rgba32[] rgbaPalette;
private readonly ColorDistanceCache cache;
// Do not make this readonly! Struct value would be always copied on non-readonly method calls.
private ColorDistanceCache cache;
private readonly Configuration configuration; private readonly Configuration configuration;
/// <summary> /// <summary>

0
tests/ImageSharp.Benchmarks/Codecs/DecodeBmp.cs → tests/ImageSharp.Benchmarks/Codecs/Bmp/DecodeBmp.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeBmp.cs → tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmp.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeBmpMultiple.cs → tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmpMultiple.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeGif.cs → tests/ImageSharp.Benchmarks/Codecs/Gif/DecodeGif.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeGif.cs → tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGif.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeGifMultiple.cs → tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGifMultiple.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/CmykColorConversion.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/ColorConversionBenchmark.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/GrayscaleColorConversion.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/RgbColorConversion.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs

0
tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrForwardConverterBenchmark.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YccKColorConverter.cs

82
tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg.cs

@ -0,0 +1,82 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.IO;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg;
using SixLabors.ImageSharp.Tests;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
public class DecodeJpeg
{
private JpegDecoder decoder;
private MemoryStream preloadedImageStream;
private void GenericSetup(string imageSubpath)
{
this.decoder = new JpegDecoder();
byte[] bytes = File.ReadAllBytes(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, imageSubpath));
this.preloadedImageStream = new MemoryStream(bytes);
}
private void GenericBechmark()
{
this.preloadedImageStream.Position = 0;
using Image img = this.decoder.Decode(Configuration.Default, this.preloadedImageStream);
}
[GlobalSetup(Target = nameof(JpegBaselineInterleaved444))]
public void SetupBaselineInterleaved444() =>
this.GenericSetup(TestImages.Jpeg.Baseline.Winter444_Interleaved);
[GlobalSetup(Target = nameof(JpegBaselineInterleaved420))]
public void SetupBaselineInterleaved420() =>
this.GenericSetup(TestImages.Jpeg.Baseline.Hiyamugi);
[GlobalSetup(Target = nameof(JpegBaseline400))]
public void SetupBaselineSingleComponent() =>
this.GenericSetup(TestImages.Jpeg.Baseline.Jpeg400);
[GlobalSetup(Target = nameof(JpegProgressiveNonInterleaved420))]
public void SetupProgressiveNoninterleaved420() =>
this.GenericSetup(TestImages.Jpeg.Progressive.Winter420_NonInterleaved);
[GlobalCleanup]
public void Cleanup()
{
this.preloadedImageStream.Dispose();
this.preloadedImageStream = null;
}
[Benchmark(Description = "Baseline 4:4:4 Interleaved")]
public void JpegBaselineInterleaved444() => this.GenericBechmark();
[Benchmark(Description = "Baseline 4:2:0 Interleaved")]
public void JpegBaselineInterleaved420() => this.GenericBechmark();
[Benchmark(Description = "Baseline 4:0:0 (grayscale)")]
public void JpegBaseline400() => this.GenericBechmark();
[Benchmark(Description = "Progressive 4:2:0 Non-Interleaved")]
public void JpegProgressiveNonInterleaved420() => this.GenericBechmark();
}
}
/*
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1348 (20H2/October2020Update)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
DefaultJob : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
| Method | Mean | Error | StdDev |
|------------------------------------ |----------:|----------:|----------:|
| 'Baseline 4:4:4 Interleaved' | 11.127 ms | 0.0659 ms | 0.0550 ms |
| 'Baseline 4:2:0 Interleaved' | 8.458 ms | 0.0289 ms | 0.0256 ms |
| 'Baseline 4:0:0 (grayscale)' | 1.550 ms | 0.0050 ms | 0.0044 ms |
| 'Progressive 4:2:0 Non-Interleaved' | 13.220 ms | 0.0449 ms | 0.0398 ms |
*/

0
tests/ImageSharp.Benchmarks/Codecs/DecodeFilteredPng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/DecodeFilteredPng.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodePng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/DecodePng.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodePng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/EncodePng.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeTga.cs → tests/ImageSharp.Benchmarks/Codecs/Tga/DecodeTga.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeTga.cs → tests/ImageSharp.Benchmarks/Codecs/Tga/EncodeTga.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeTiff.cs → tests/ImageSharp.Benchmarks/Codecs/Tiff/DecodeTiff.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeTiff.cs → tests/ImageSharp.Benchmarks/Codecs/Tiff/EncodeTiff.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs → tests/ImageSharp.Benchmarks/Codecs/Webp/DecodeWebp.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs → tests/ImageSharp.Benchmarks/Codecs/Webp/EncodeWebp.cs

5
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -183,9 +183,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Assert.Equal(expected, actual); Assert.Equal(expected, actual);
} }
// This method has only 2 implementations:
// 1. AVX
// 2. Scalar
FeatureTestRunner.RunWithHwIntrinsicsFeature( FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest, RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic); HwIntrinsics.AllowAll | HwIntrinsics.DisableHWIntrinsic);
} }
private static float[] Create8x8ColorCropTestData() private static float[] Create8x8ColorCropTestData()

26
tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs

@ -276,5 +276,31 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
seed, seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
} }
[Fact]
public void TransposeInplace()
{
static void RunTest()
{
short[] expected = Create8x8ShortData();
ReferenceImplementations.Transpose8x8(expected);
var block8x8 = default(Block8x8);
block8x8.LoadFrom(Create8x8ShortData());
block8x8.TransposeInplace();
short[] actual = new short[64];
block8x8.CopyTo(actual);
Assert.Equal(expected, actual);
}
// This method has only 1 implementation:
// 1. Scalar
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.DisableHWIntrinsic);
}
} }
} }

209
tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs

@ -2,9 +2,6 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities;
@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Trait("Format", "Jpg")] [Trait("Format", "Jpg")]
public static class DCTTests public static class DCTTests
{ {
private const int MaxAllowedValue = short.MaxValue;
private const int MinAllowedValue = short.MinValue;
internal static Block8x8F CreateBlockFromScalar(float value)
{
Block8x8F result = default;
for (int i = 0; i < Block8x8F.Size; i++)
{
result[i] = value;
}
return result;
}
public class FastFloatingPoint : JpegFixture public class FastFloatingPoint : JpegFixture
{ {
public FastFloatingPoint(ITestOutputHelper output) public FastFloatingPoint(ITestOutputHelper output)
@ -24,130 +35,75 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ {
} }
// Reference tests
[Theory] [Theory]
[InlineData(1)] [InlineData(1)]
[InlineData(2)] [InlineData(2)]
[InlineData(3)] [InlineData(3)]
public void LLM_TransformIDCT_CompareToNonOptimized(int seed) public void LLM_TransformIDCT_CompareToNonOptimized(int seed)
{ {
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var srcBlock = Block8x8F.Load(sourceArray); var srcBlock = Block8x8F.Load(sourceArray);
// reference
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock); Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock);
var temp = default(Block8x8F); // testee
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); // Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
this.CompareBlocks(expected, srcBlock, 1f); // before applying IDCT
} // Dequantization using unit matrix - no values are upscaled
Block8x8F dequantMatrix = CreateBlockFromScalar(1);
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToAccurate(int seed)
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
var srcBlock = Block8x8F.Load(sourceArray); // This step is needed to apply adjusting multipliers to the input block
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock); // IDCT implementation tranforms blocks after transposition
srcBlock.TransposeInplace();
srcBlock.MultiplyInPlace(ref dequantMatrix);
var temp = default(Block8x8F); // IDCT calculation
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); FastFloatingPointDCT.TransformIDCT(ref srcBlock);
this.CompareBlocks(expected, srcBlock, 1f); this.CompareBlocks(expected, srcBlock, 1f);
} }
// Inverse transform
[Theory]
[InlineData(1)]
[InlineData(2)]
public void IDCT8x4_LeftPart(int seed)
{
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
// testee
FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory] [Theory]
[InlineData(1)] [InlineData(1)]
[InlineData(2)] [InlineData(2)]
public void IDCT8x4_RightPart(int seed) [InlineData(3)]
public void LLM_TransformIDCT_CompareToAccurate(int seed)
{ {
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F); var srcBlock = Block8x8F.Load(sourceArray);
var expectedDest = new float[64];
// reference // reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
// testee // testee
FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock); // Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
var actualDest = new float[64]; // before applying IDCT
destBlock.ScaledCopyTo(actualDest); // Dequantization using unit matrix - no values are upscaled
Block8x8F dequantMatrix = CreateBlockFromScalar(1);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void IDCT8x8_Avx(int seed)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (!Avx.IsSupported)
{
this.Output.WriteLine("No AVX present, skipping test!");
return;
}
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
Block8x8F srcBlock = default;
srcBlock.LoadFrom(src);
Block8x8F destBlock = default; // This step is needed to apply adjusting multipliers to the input block
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
float[] expectedDest = new float[64]; // IDCT implementation tranforms blocks after transposition
srcBlock.TransposeInplace();
srcBlock.MultiplyInPlace(ref dequantMatrix);
// reference, left part // IDCT calculation
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); FastFloatingPointDCT.TransformIDCT(ref srcBlock);
// reference, right part this.CompareBlocks(expected, srcBlock, 1f);
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
// testee, whole 8x8
FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock);
float[] actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
#endif
} }
// Inverse transform
// This test covers entire IDCT conversion chain
// This test checks all hardware implementations
[Theory] [Theory]
[InlineData(1)] [InlineData(1)]
[InlineData(2)] [InlineData(2)]
@ -157,41 +113,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ {
int seed = FeatureTestRunner.Deserialize<int>(serialized); int seed = FeatureTestRunner.Deserialize<int>(serialized);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var srcBlock = default(Block8x8F); var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src); srcBlock.LoadFrom(src);
var expectedDest = new float[64]; float[] expectedDest = new float[64];
var temp1 = new float[64]; float[] temp = new float[64];
var temp2 = default(Block8x8F);
// reference // reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1); ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp);
// testee // testee
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2); // Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
// before applying IDCT
Block8x8F dequantMatrix = CreateBlockFromScalar(1);
// Dequantization using unit matrix - no values are upscaled
// as quant matrix is all 1's
// This step is needed to apply adjusting multipliers to the input block
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
srcBlock.MultiplyInPlace(ref dequantMatrix);
// IDCT implementation tranforms blocks after transposition
srcBlock.TransposeInplace();
var actualDest = new float[64]; // IDCT calculation
srcBlock.ScaledCopyTo(actualDest); FastFloatingPointDCT.TransformIDCT(ref srcBlock);
float[] actualDest = srcBlock.ToArray();
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
} }
// 3 paths: // 4 paths:
// 1. AllowAll - call avx/fma implementation // 1. AllowAll - call avx/fma implementation
// 2. DisableFMA - call avx implementation without fma acceleration // 2. DisableFMA - call avx without fma implementation
// 3. DisableAvx - call fallback code of Vector4 implementation // 3. DisableAvx - call sse Vector4 implementation
// // 4. DisableHWIntrinsic - call scalar fallback implementation
// DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
FeatureTestRunner.RunWithHwIntrinsicsFeature( FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest, RunTest,
seed, seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX); HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
} }
// Forward transform // Forward transform
// This test covers entire FDCT conversions chain // This test covers entire FDCT conversion chain
// This test checks all implementations: intrinsic and scalar fallback // This test checks all hardware implementations
[Theory] [Theory]
[InlineData(1)] [InlineData(1)]
[InlineData(2)] [InlineData(2)]
@ -201,7 +169,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ {
int seed = FeatureTestRunner.Deserialize<int>(serialized); int seed = FeatureTestRunner.Deserialize<int>(serialized);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var block = default(Block8x8F); var block = default(Block8x8F);
block.LoadFrom(src); block.LoadFrom(src);
@ -212,23 +180,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
// testee // testee
// Part of the FDCT calculations is fused into the quantization step
// We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen
FastFloatingPointDCT.TransformFDCT(ref block); FastFloatingPointDCT.TransformFDCT(ref block);
for (int i = 0; i < 64; i++)
{ // Part of the IDCT calculations is fused into the quantization step
block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i]; // We must multiply input block with adjusted no-quantization matrix
} // after applying FDCT
Block8x8F quantMatrix = CreateBlockFromScalar(1);
FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix);
block.MultiplyInPlace(ref quantMatrix);
float[] actualDest = block.ToArray(); float[] actualDest = block.ToArray();
Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f)); Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f));
} }
// 3 paths: // 4 paths:
// 1. AllowAll - call avx/fma implementation // 1. AllowAll - call avx/fma implementation
// 2. DisableFMA - call avx implementation without fma acceleration // 2. DisableFMA - call avx without fma implementation
// 3. DisableAvx - call sse implementation // 3. DisableAvx - call sse Vector4 implementation
// 4. DisableHWIntrinsic - call scalar fallback implementation // 4. DisableHWIntrinsic - call scalar fallback implementation
FeatureTestRunner.RunWithHwIntrinsicsFeature( FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest, RunTest,

2
tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs

@ -20,6 +20,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
TestImages.Jpeg.Baseline.Jpeg420Small, TestImages.Jpeg.Baseline.Jpeg420Small,
TestImages.Jpeg.Issues.Fuzz.AccessViolationException922, TestImages.Jpeg.Issues.Fuzz.AccessViolationException922,
TestImages.Jpeg.Baseline.Jpeg444, TestImages.Jpeg.Baseline.Jpeg444,
TestImages.Jpeg.Baseline.Jpeg422,
TestImages.Jpeg.Baseline.Bad.BadEOF, TestImages.Jpeg.Baseline.Bad.BadEOF,
TestImages.Jpeg.Baseline.MultiScanBaselineCMYK, TestImages.Jpeg.Baseline.MultiScanBaselineCMYK,
TestImages.Jpeg.Baseline.YcckSubsample1222, TestImages.Jpeg.Baseline.YcckSubsample1222,
@ -100,6 +101,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[TestImages.Jpeg.Baseline.Bad.BadEOF] = 0.38f / 100, [TestImages.Jpeg.Baseline.Bad.BadEOF] = 0.38f / 100,
[TestImages.Jpeg.Baseline.Bad.BadRST] = 0.0589f / 100, [TestImages.Jpeg.Baseline.Bad.BadRST] = 0.0589f / 100,
[TestImages.Jpeg.Baseline.Jpeg422] = 0.0013f / 100,
[TestImages.Jpeg.Baseline.Testorig420] = 0.38f / 100, [TestImages.Jpeg.Baseline.Testorig420] = 0.38f / 100,
[TestImages.Jpeg.Baseline.Jpeg420Small] = 0.287f / 100, [TestImages.Jpeg.Baseline.Jpeg420Small] = 0.287f / 100,
[TestImages.Jpeg.Baseline.Turtle420] = 1.0f / 100, [TestImages.Jpeg.Baseline.Turtle420] = 1.0f / 100,

2
tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs

@ -56,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ TestImages.Jpeg.Progressive.Fb, 75 }, { TestImages.Jpeg.Progressive.Fb, 75 },
{ TestImages.Jpeg.Issues.IncorrectQuality845, 98 }, { TestImages.Jpeg.Issues.IncorrectQuality845, 98 },
{ TestImages.Jpeg.Baseline.ForestBridgeDifferentComponentsQuality, 89 }, { TestImages.Jpeg.Baseline.ForestBridgeDifferentComponentsQuality, 89 },
{ TestImages.Jpeg.Progressive.Winter, 80 } { TestImages.Jpeg.Progressive.Winter420_NonInterleaved, 80 }
}; };
[Theory] [Theory]

2
tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs

@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
bool failed = false; bool failed = false;
for (int i = 0; i < 64; i++) for (int i = 0; i < Block8x8F.Size; i++)
{ {
float expected = a[i]; float expected = a[i];
float actual = b[i]; float actual = b[i];

15
tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs

@ -48,6 +48,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
public short MaxVal { get; private set; } = short.MinValue; public short MaxVal { get; private set; } = short.MinValue;
internal void MakeBlock(Block8x8 block, int y, int x)
{
block.TransposeInplace();
this.MakeBlock(block.ToArray(), y, x);
}
internal void MakeBlock(short[] data, int y, int x) internal void MakeBlock(short[] data, int y, int x)
{ {
this.MinVal = Math.Min(this.MinVal, data.Min()); this.MinVal = Math.Min(this.MinVal, data.Min());
@ -66,11 +72,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
Span<Block8x8> blockRow = data.GetRowSpan(y - startIndex); Span<Block8x8> blockRow = data.GetRowSpan(y - startIndex);
for (int x = 0; x < this.WidthInBlocks; x++) for (int x = 0; x < this.WidthInBlocks; x++)
{ {
short[] block = blockRow[x].ToArray(); this.MakeBlock(blockRow[x], y, x);
// x coordinate stays the same - we load entire stride
// y coordinate is tricky as we load single stride to full buffer - offset is needed
this.MakeBlock(block, y, x);
} }
} }
} }
@ -83,8 +85,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
Span<Block8x8> blockRow = data.GetRowSpan(y); Span<Block8x8> blockRow = data.GetRowSpan(y);
for (int x = 0; x < this.WidthInBlocks; x++) for (int x = 0; x < this.WidthInBlocks; x++)
{ {
short[] block = blockRow[x].ToArray(); this.MakeBlock(blockRow[x], y, x);
this.MakeBlock(block, y, x);
} }
} }
} }

17
tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs

@ -40,6 +40,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
} }
} }
/// <summary>
/// Transpose 8x8 block stored linearly in a <see cref="Span{T}"/> (inplace)
/// </summary>
internal static void Transpose8x8(Span<short> data)
{
for (int i = 1; i < 8; i++)
{
int i8 = i * 8;
for (int j = 0; j < i; j++)
{
short tmp = data[i8 + j];
data[i8 + j] = data[(j * 8) + i];
data[(j * 8) + i] = tmp;
}
}
}
/// <summary> /// <summary>
/// Transpose 8x8 block stored linearly in a <see cref="Span{T}"/> /// Transpose 8x8 block stored linearly in a <see cref="Span{T}"/>
/// </summary> /// </summary>

14
tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Formats.Jpeg.Components;
using Xunit; using Xunit;
@ -9,8 +10,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Trait("Format", "Jpg")] [Trait("Format", "Jpg")]
public class ZigZagTests public class ZigZagTests
{ {
[Fact] private static void CanHandleAllPossibleCoefficients(ReadOnlySpan<byte> order)
public void ZigZagCanHandleAllPossibleCoefficients()
{ {
// Mimic the behaviour of the huffman scan decoder using all possible byte values // Mimic the behaviour of the huffman scan decoder using all possible byte values
short[] block = new short[64]; short[] block = new short[64];
@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
if (s != 0) if (s != 0)
{ {
i += r; i += r;
block[ZigZag.ZigZagOrder[i++]] = (short)s; block[order[i++]] = (short)s;
} }
else else
{ {
@ -40,5 +40,13 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
} }
} }
} }
[Fact]
public static void ZigZagCanHandleAllPossibleCoefficients() =>
CanHandleAllPossibleCoefficients(ZigZag.ZigZagOrder);
[Fact]
public static void TrasposingZigZagCanHandleAllPossibleCoefficients() =>
CanHandleAllPossibleCoefficients(ZigZag.TransposingOrder);
} }
} }

2
tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs

@ -5,7 +5,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit; using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP namespace SixLabors.ImageSharp.Tests.Formats.Webp
{ {
[Trait("Format", "Webp")] [Trait("Format", "Webp")]
public class ColorSpaceTransformUtilsTests public class ColorSpaceTransformUtilsTests

4
tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs

@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit; using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP namespace SixLabors.ImageSharp.Tests.Formats.Webp
{ {
[Trait("Format", "Webp")] [Trait("Format", "Webp")]
public class LossyUtilsTests public class LossyUtilsTests
@ -38,7 +38,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
int actual = LossyUtils.Vp8_Sse4X4(a, b); int actual = LossyUtils.Vp8_Sse4X4(a, b);
Assert.Equal(expected, actual); Assert.Equal(expected, actual);
} }
private static void RunMean16x4Test() private static void RunMean16x4Test()
{ {

2
tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs

@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit; using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP namespace SixLabors.ImageSharp.Tests.Formats.Webp
{ {
[Trait("Format", "Webp")] [Trait("Format", "Webp")]
public class QuantEncTests public class QuantEncTests

2
tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs

@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit; using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP namespace SixLabors.ImageSharp.Tests.Formats.Webp
{ {
[Trait("Format", "Webp")] [Trait("Format", "Webp")]
public class Vp8EncodingTests public class Vp8EncodingTests

3
tests/ImageSharp.Tests/TestImages.cs

@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests
public const string Fb = "Jpg/progressive/fb.jpg"; public const string Fb = "Jpg/progressive/fb.jpg";
public const string Progress = "Jpg/progressive/progress.jpg"; public const string Progress = "Jpg/progressive/progress.jpg";
public const string Festzug = "Jpg/progressive/Festzug.jpg"; public const string Festzug = "Jpg/progressive/Festzug.jpg";
public const string Winter = "Jpg/progressive/winter.jpg"; public const string Winter420_NonInterleaved = "Jpg/progressive/winter420_noninterleaved.jpg";
public static class Bad public static class Bad
{ {
@ -213,6 +213,7 @@ namespace SixLabors.ImageSharp.Tests
public const string ArithmeticCoding = "Jpg/baseline/arithmetic_coding.jpg"; public const string ArithmeticCoding = "Jpg/baseline/arithmetic_coding.jpg";
public const string ArithmeticCodingProgressive = "Jpg/progressive/arithmetic_progressive.jpg"; public const string ArithmeticCodingProgressive = "Jpg/progressive/arithmetic_progressive.jpg";
public const string Lossless = "Jpg/baseline/lossless.jpg"; public const string Lossless = "Jpg/baseline/lossless.jpg";
public const string Winter444_Interleaved = "Jpg/baseline/winter444_interleaved.jpg";
public static readonly string[] All = public static readonly string[] All =
{ {

3
tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:733cc46271c4402974db2536a55e6ecae3110856df73031ca48dad03745d852d
size 35375

3
tests/Images/Input/Jpg/baseline/winter444_interleaved.jpg

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:73b1deb4e2fb8027f6bb4fb293e5b2615c80b3ac0a7f99fd90118fd340a9fd12
size 283330

0
tests/Images/Input/Jpg/progressive/winter.jpg → tests/Images/Input/Jpg/progressive/winter420_noninterleaved.jpg

Loading…
Cancel
Save