Browse Source

Merge remote-tracking branch 'origin/master' into af/UniformUnmanagedMemoryPoolMemoryAllocator-02

pull/1730/head
Anton Firszov 5 years ago
parent
commit
2b209f0a42
  1. 2
      shared-infrastructure
  2. 58
      src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
  3. 11
      src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
  4. 8
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
  5. 19
      src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs
  6. 28
      src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs
  7. 237
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
  8. 532
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
  9. 29
      src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs
  10. 3
      src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
  11. 4
      src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
  12. 4
      src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
  13. 12
      src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs
  14. 2
      src/ImageSharp/Formats/Webp/EntropyIx.cs
  15. 2
      src/ImageSharp/Formats/Webp/HistoIx.cs
  16. 49
      src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
  17. 268
      src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs
  18. 74
      src/ImageSharp/Formats/Webp/Lossless/CostManager.cs
  19. 2
      src/ImageSharp/Formats/Webp/Lossless/CostModel.cs
  20. 10
      src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs
  21. 2
      src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
  22. 2
      src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs
  23. 4
      src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
  24. 53
      src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
  25. 341
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  26. 2
      src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
  27. 2
      src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs
  28. 185
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
  29. 2
      src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs
  30. 26
      src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
  31. 40
      src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs
  32. 10
      src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
  33. 27
      src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
  34. 61
      src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
  35. 6
      src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
  36. 8
      src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs
  37. 2
      src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
  38. 323
      src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
  39. 3
      src/ImageSharp/Formats/Webp/WebpLookupTables.cs
  40. 2
      src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs
  41. 2
      src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs
  42. 5
      src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs
  43. 4
      src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs
  44. 0
      tests/ImageSharp.Benchmarks/Codecs/Bmp/DecodeBmp.cs
  45. 0
      tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmp.cs
  46. 0
      tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmpMultiple.cs
  47. 0
      tests/ImageSharp.Benchmarks/Codecs/Gif/DecodeGif.cs
  48. 0
      tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGif.cs
  49. 0
      tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGifMultiple.cs
  50. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/CmykColorConversion.cs
  51. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/ColorConversionBenchmark.cs
  52. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/GrayscaleColorConversion.cs
  53. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/RgbColorConversion.cs
  54. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs
  55. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrForwardConverterBenchmark.cs
  56. 0
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YccKColorConverter.cs
  57. 82
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg.cs
  58. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/DecodeFilteredPng.cs
  59. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/DecodePng.cs
  60. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs
  61. 0
      tests/ImageSharp.Benchmarks/Codecs/Png/EncodePng.cs
  62. 0
      tests/ImageSharp.Benchmarks/Codecs/Tga/DecodeTga.cs
  63. 0
      tests/ImageSharp.Benchmarks/Codecs/Tga/EncodeTga.cs
  64. 0
      tests/ImageSharp.Benchmarks/Codecs/Tiff/DecodeTiff.cs
  65. 0
      tests/ImageSharp.Benchmarks/Codecs/Tiff/EncodeTiff.cs
  66. 0
      tests/ImageSharp.Benchmarks/Codecs/Webp/DecodeWebp.cs
  67. 0
      tests/ImageSharp.Benchmarks/Codecs/Webp/EncodeWebp.cs
  68. 7
      tests/ImageSharp.Benchmarks/benchmark.sh
  69. 5
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  70. 26
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs
  71. 209
      tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
  72. 2
      tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs
  73. 2
      tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs
  74. 2
      tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs
  75. 15
      tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs
  76. 17
      tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs
  77. 14
      tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs
  78. 92
      tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs
  79. 6
      tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
  80. 4
      tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs
  81. 6
      tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs
  82. 2
      tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs
  83. 98
      tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs
  84. 9
      tests/ImageSharp.Tests/Processing/Processors/Convolution/Basic1ParameterConvolutionTests.cs
  85. 3
      tests/ImageSharp.Tests/TestImages.cs
  86. 3
      tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/InBox_Rgba32_blur_3.png
  87. 3
      tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/InBox_Rgba32_blur_5.png
  88. 3
      tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/OnFullImage_Rgba32_blur_3.png
  89. 3
      tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/OnFullImage_Rgba32_blur_5.png
  90. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/InBox_Rgba32_blur_3.png
  91. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/InBox_Rgba32_blur_5.png
  92. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/OnFullImage_Rgba32_blur_3.png
  93. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/OnFullImage_Rgba32_blur_5.png
  94. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/InBox_Rgba32_blur_3.png
  95. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/InBox_Rgba32_blur_5.png
  96. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/OnFullImage_Rgba32_blur_3.png
  97. 3
      tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/OnFullImage_Rgba32_blur_5.png
  98. 3
      tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png
  99. 3
      tests/Images/Input/Jpg/baseline/winter444_interleaved.jpg
  100. 0
      tests/Images/Input/Jpg/progressive/winter420_noninterleaved.jpg

2
shared-infrastructure

@ -1 +1 @@
Subproject commit 33cb12ca77f919b44de56f344d2627cc2a108c3a
Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3

58
src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs

@ -337,6 +337,64 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
}
/// <summary>
/// Transpose the block inplace.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInplace()
{
ref short elemRef = ref Unsafe.As<Block8x8, short>(ref this);
// row #0
Swap(ref Unsafe.Add(ref elemRef, 1), ref Unsafe.Add(ref elemRef, 8));
Swap(ref Unsafe.Add(ref elemRef, 2), ref Unsafe.Add(ref elemRef, 16));
Swap(ref Unsafe.Add(ref elemRef, 3), ref Unsafe.Add(ref elemRef, 24));
Swap(ref Unsafe.Add(ref elemRef, 4), ref Unsafe.Add(ref elemRef, 32));
Swap(ref Unsafe.Add(ref elemRef, 5), ref Unsafe.Add(ref elemRef, 40));
Swap(ref Unsafe.Add(ref elemRef, 6), ref Unsafe.Add(ref elemRef, 48));
Swap(ref Unsafe.Add(ref elemRef, 7), ref Unsafe.Add(ref elemRef, 56));
// row #1
Swap(ref Unsafe.Add(ref elemRef, 10), ref Unsafe.Add(ref elemRef, 17));
Swap(ref Unsafe.Add(ref elemRef, 11), ref Unsafe.Add(ref elemRef, 25));
Swap(ref Unsafe.Add(ref elemRef, 12), ref Unsafe.Add(ref elemRef, 33));
Swap(ref Unsafe.Add(ref elemRef, 13), ref Unsafe.Add(ref elemRef, 41));
Swap(ref Unsafe.Add(ref elemRef, 14), ref Unsafe.Add(ref elemRef, 49));
Swap(ref Unsafe.Add(ref elemRef, 15), ref Unsafe.Add(ref elemRef, 57));
// row #2
Swap(ref Unsafe.Add(ref elemRef, 19), ref Unsafe.Add(ref elemRef, 26));
Swap(ref Unsafe.Add(ref elemRef, 20), ref Unsafe.Add(ref elemRef, 34));
Swap(ref Unsafe.Add(ref elemRef, 21), ref Unsafe.Add(ref elemRef, 42));
Swap(ref Unsafe.Add(ref elemRef, 22), ref Unsafe.Add(ref elemRef, 50));
Swap(ref Unsafe.Add(ref elemRef, 23), ref Unsafe.Add(ref elemRef, 58));
// row #3
Swap(ref Unsafe.Add(ref elemRef, 28), ref Unsafe.Add(ref elemRef, 35));
Swap(ref Unsafe.Add(ref elemRef, 29), ref Unsafe.Add(ref elemRef, 43));
Swap(ref Unsafe.Add(ref elemRef, 30), ref Unsafe.Add(ref elemRef, 51));
Swap(ref Unsafe.Add(ref elemRef, 31), ref Unsafe.Add(ref elemRef, 59));
// row #4
Swap(ref Unsafe.Add(ref elemRef, 37), ref Unsafe.Add(ref elemRef, 44));
Swap(ref Unsafe.Add(ref elemRef, 38), ref Unsafe.Add(ref elemRef, 52));
Swap(ref Unsafe.Add(ref elemRef, 39), ref Unsafe.Add(ref elemRef, 60));
// row #5
Swap(ref Unsafe.Add(ref elemRef, 46), ref Unsafe.Add(ref elemRef, 53));
Swap(ref Unsafe.Add(ref elemRef, 47), ref Unsafe.Add(ref elemRef, 61));
// row #6
Swap(ref Unsafe.Add(ref elemRef, 55), ref Unsafe.Add(ref elemRef, 62));
static void Swap(ref short a, ref short b)
{
short tmp = a;
a = b;
b = tmp;
}
}
/// <summary>
/// Calculate the total sum of absolute differences of elements in 'a' and 'b'.
/// </summary>

11
src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs

@ -151,6 +151,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (this.componentsCount == this.frame.ComponentCount)
{
this.ParseBaselineDataInterleaved();
this.spectralConverter.CommitConversion();
}
else
{
@ -501,7 +502,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
i += r;
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i++]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[i++]) = (short)s;
}
else
{
@ -570,7 +571,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (s != 0)
{
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i]) = (short)(s << low);
Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[i]) = (short)(s << low);
}
else
{
@ -646,7 +647,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
do
{
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]);
if (coef != 0)
{
buffer.CheckBits();
@ -672,7 +673,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if ((s != 0) && (k < 64))
{
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]) = (short)s;
}
}
}
@ -681,7 +682,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
for (; k <= end; k++)
{
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]);
if (coef != 0)
{

8
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

@ -18,11 +18,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </summary>
public Block8x8F SourceBlock;
/// <summary>
/// Temporal block to store intermediate computation results.
/// </summary>
public Block8x8F WorkspaceBlock;
/// <summary>
/// The quantization table as <see cref="Block8x8F"/>.
/// </summary>
@ -45,7 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.subSamplingDivisors = component.SubSamplingDivisors;
this.SourceBlock = default;
this.WorkspaceBlock = default;
}
/// <summary>
@ -71,7 +65,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
// Dequantize:
block.MultiplyInPlace(ref this.DequantiazationTable);
FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock);
FastFloatingPointDCT.TransformIDCT(ref block);
// To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations.

19
src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs

@ -13,6 +13,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </remarks>
internal abstract class SpectralConverter
{
/// <summary>
/// Gets a value indicating whether this converter has converted spectral
/// data of the current image or not.
/// </summary>
protected bool Converted { get; private set; }
/// <summary>
/// Injects jpeg image decoding metadata.
/// </summary>
@ -33,6 +39,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </remarks>
public abstract void ConvertStrideBaseline();
/// <summary>
/// Marks current converter state as 'converted'.
/// </summary>
/// <remarks>
/// This must be called only for baseline interleaved jpeg's.
/// </remarks>
public void CommitConversion()
{
DebugGuard.IsFalse(this.Converted, nameof(this.Converted), $"{nameof(this.CommitConversion)} must be called only once");
this.Converted = true;
}
/// <summary>
/// Gets the color converter.
/// </summary>

28
src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs

@ -3,6 +3,7 @@
using System;
using System.Buffers;
using System.Linq;
using System.Numerics;
using System.Threading;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
@ -29,8 +30,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
private Buffer2D<TPixel> pixelBuffer;
private int blockRowsPerStep;
private int pixelRowsPerStep;
private int pixelRowCounter;
@ -41,8 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.cancellationToken = cancellationToken;
}
private bool Converted => this.pixelRowCounter >= this.pixelBuffer.Height;
public Buffer2D<TPixel> GetPixelBuffer()
{
if (!this.Converted)
@ -52,7 +49,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
for (int step = 0; step < steps; step++)
{
this.cancellationToken.ThrowIfCancellationRequested();
this.ConvertNextStride(step);
this.ConvertStride(step);
}
}
@ -65,18 +62,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
MemoryAllocator allocator = this.configuration.MemoryAllocator;
// iteration data
IJpegComponent c0 = frame.Components[0];
int majorBlockWidth = frame.Components.Max((component) => component.SizeInBlocks.Width);
int majorVerticalSamplingFactor = frame.Components.Max((component) => component.SamplingFactors.Height);
const int blockPixelHeight = 8;
this.blockRowsPerStep = c0.SamplingFactors.Height;
this.pixelRowsPerStep = this.blockRowsPerStep * blockPixelHeight;
this.pixelRowsPerStep = majorVerticalSamplingFactor * blockPixelHeight;
// pixel buffer for resulting image
this.pixelBuffer = allocator.Allocate2D<TPixel>(frame.PixelWidth, frame.PixelHeight);
this.paddedProxyPixelRow = allocator.Allocate<TPixel>(frame.PixelWidth + 3);
// component processors from spectral to Rgba32
var postProcessorBufferSize = new Size(c0.SizeInBlocks.Width * 8, this.pixelRowsPerStep);
const int blockPixelWidth = 8;
var postProcessorBufferSize = new Size(majorBlockWidth * blockPixelWidth, this.pixelRowsPerStep);
this.componentProcessors = new JpegComponentPostProcessor[frame.Components.Length];
for (int i = 0; i < this.componentProcessors.Length; i++)
{
@ -84,7 +82,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
// single 'stride' rgba32 buffer for conversion between spectral and TPixel
// this.rgbaBuffer = allocator.Allocate<Vector4>(frame.PixelWidth);
this.rgbBuffer = allocator.Allocate<byte>(frame.PixelWidth * 3);
// color converter from Rgba32 to TPixel
@ -95,18 +92,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public override void ConvertStrideBaseline()
{
// Convert next pixel stride using single spectral `stride'
// Note that zero passing eliminates the need of virtual call from JpegComponentPostProcessor
this.ConvertNextStride(spectralStep: 0);
// Note that zero passing eliminates the need of virtual call
// from JpegComponentPostProcessor
this.ConvertStride(spectralStep: 0);
// Clear spectral stride - this is VERY important as jpeg possibly won't fill entire buffer each stride
// Which leads to decoding artifacts
// Note that this code clears all buffers of the post processors, it's their responsibility to allocate only single stride
foreach (JpegComponentPostProcessor cpp in this.componentProcessors)
{
cpp.ClearSpectralBuffers();
}
}
/// <inheritdoc/>
public void Dispose()
{
if (this.componentProcessors != null)
@ -121,7 +117,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.paddedProxyPixelRow?.Dispose();
}
private void ConvertNextStride(int spectralStep)
private void ConvertStride(int spectralStep)
{
int maxY = Math.Min(this.pixelBuffer.Height, this.pixelRowCounter + this.pixelRowsPerStep);

237
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs

@ -2,9 +2,6 @@
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
@ -12,149 +9,147 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal static partial class FastFloatingPointDCT
{
#pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings
#pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings
private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f);
private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f);
private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f);
private static readonly Vector256<float> mm256_F_1_1758 = Vector256.Create(1.175876f);
private static readonly Vector256<float> mm256_F_n1_9615 = Vector256.Create(-1.961570560f);
private static readonly Vector256<float> mm256_F_n0_3901 = Vector256.Create(-0.390180644f);
private static readonly Vector256<float> mm256_F_n0_8999 = Vector256.Create(-0.899976223f);
private static readonly Vector256<float> mm256_F_n2_5629 = Vector256.Create(-2.562915447f);
private static readonly Vector256<float> mm256_F_0_2986 = Vector256.Create(0.298631336f);
private static readonly Vector256<float> mm256_F_2_0531 = Vector256.Create(2.053119869f);
private static readonly Vector256<float> mm256_F_3_0727 = Vector256.Create(3.072711026f);
private static readonly Vector256<float> mm256_F_1_5013 = Vector256.Create(1.501321110f);
private static readonly Vector256<float> mm256_F_n1_8477 = Vector256.Create(-1.847759065f);
private static readonly Vector256<float> mm256_F_0_7653 = Vector256.Create(0.765366865f);
private static readonly Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
private static readonly Vector256<float> mm256_F_1_8477 = Vector256.Create(1.847759065f);
private static readonly Vector256<float> mm256_F_n1_0823 = Vector256.Create(-1.082392200f);
private static readonly Vector256<float> mm256_F_n2_6131 = Vector256.Create(-2.613125930f);
#pragma warning restore SA1310, SA1311, IDE1006
/// <summary>
/// Apply floating point FDCT inplace using simd operations.
/// </summary>
/// <param name="block">Input matrix.</param>
private static void ForwardTransform_Avx(ref Block8x8F block)
/// <param name="block">Input block.</param>
private static void FDCT8x8_Avx(ref Block8x8F block)
{
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
// First pass - process rows
block.TransposeInplace();
FDCT8x8_Avx(ref block);
FDCT8x8_1D_Avx(ref block);
// Second pass - process columns
block.TransposeInplace();
FDCT8x8_Avx(ref block);
FDCT8x8_1D_Avx(ref block);
// Applies 1D floating point FDCT inplace
static void FDCT8x8_1D_Avx(ref Block8x8F block)
{
Vector256<float> tmp0 = Avx.Add(block.V0, block.V7);
Vector256<float> tmp7 = Avx.Subtract(block.V0, block.V7);
Vector256<float> tmp1 = Avx.Add(block.V1, block.V6);
Vector256<float> tmp6 = Avx.Subtract(block.V1, block.V6);
Vector256<float> tmp2 = Avx.Add(block.V2, block.V5);
Vector256<float> tmp5 = Avx.Subtract(block.V2, block.V5);
Vector256<float> tmp3 = Avx.Add(block.V3, block.V4);
Vector256<float> tmp4 = Avx.Subtract(block.V3, block.V4);
// Even part
Vector256<float> tmp10 = Avx.Add(tmp0, tmp3);
Vector256<float> tmp13 = Avx.Subtract(tmp0, tmp3);
Vector256<float> tmp11 = Avx.Add(tmp1, tmp2);
Vector256<float> tmp12 = Avx.Subtract(tmp1, tmp2);
block.V0 = Avx.Add(tmp10, tmp11);
block.V4 = Avx.Subtract(tmp10, tmp11);
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
block.V2 = Avx.Add(tmp13, z1);
block.V6 = Avx.Subtract(tmp13, z1);
// Odd part
tmp10 = Avx.Add(tmp4, tmp5);
tmp11 = Avx.Add(tmp5, tmp6);
tmp12 = Avx.Add(tmp6, tmp7);
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);
Vector256<float> z11 = Avx.Add(tmp7, z3);
Vector256<float> z13 = Avx.Subtract(tmp7, z3);
block.V5 = Avx.Add(z13, z2);
block.V3 = Avx.Subtract(z13, z2);
block.V1 = Avx.Add(z11, z4);
block.V7 = Avx.Subtract(z11, z4);
}
}
/// <summary>
/// Apply 1D floating point FDCT inplace using AVX operations on 8x8 matrix.
/// Apply floating point IDCT inplace using simd operations.
/// </summary>
/// <remarks>
/// Requires Avx support.
/// </remarks>
/// <param name="block">Input matrix.</param>
public static void FDCT8x8_Avx(ref Block8x8F block)
/// <param name="transposedBlock">Transposed input block.</param>
private static void IDCT8x8_Avx(ref Block8x8F transposedBlock)
{
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
Vector256<float> tmp0 = Avx.Add(block.V0, block.V7);
Vector256<float> tmp7 = Avx.Subtract(block.V0, block.V7);
Vector256<float> tmp1 = Avx.Add(block.V1, block.V6);
Vector256<float> tmp6 = Avx.Subtract(block.V1, block.V6);
Vector256<float> tmp2 = Avx.Add(block.V2, block.V5);
Vector256<float> tmp5 = Avx.Subtract(block.V2, block.V5);
Vector256<float> tmp3 = Avx.Add(block.V3, block.V4);
Vector256<float> tmp4 = Avx.Subtract(block.V3, block.V4);
// Even part
Vector256<float> tmp10 = Avx.Add(tmp0, tmp3);
Vector256<float> tmp13 = Avx.Subtract(tmp0, tmp3);
Vector256<float> tmp11 = Avx.Add(tmp1, tmp2);
Vector256<float> tmp12 = Avx.Subtract(tmp1, tmp2);
block.V0 = Avx.Add(tmp10, tmp11);
block.V4 = Avx.Subtract(tmp10, tmp11);
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
block.V2 = Avx.Add(tmp13, z1);
block.V6 = Avx.Subtract(tmp13, z1);
// Odd part
tmp10 = Avx.Add(tmp4, tmp5);
tmp11 = Avx.Add(tmp5, tmp6);
tmp12 = Avx.Add(tmp6, tmp7);
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);
Vector256<float> z11 = Avx.Add(tmp7, z3);
Vector256<float> z13 = Avx.Subtract(tmp7, z3);
block.V5 = Avx.Add(z13, z2);
block.V3 = Avx.Subtract(z13, z2);
block.V1 = Avx.Add(z11, z4);
block.V7 = Avx.Subtract(z11, z4);
}
/// <summary>
/// Combined operation of <see cref="IDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="IDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> my1 = s.V1;
Vector256<float> my7 = s.V7;
Vector256<float> mz0 = Avx.Add(my1, my7);
Vector256<float> my3 = s.V3;
Vector256<float> mz2 = Avx.Add(my3, my7);
Vector256<float> my5 = s.V5;
Vector256<float> mz1 = Avx.Add(my3, my5);
Vector256<float> mz3 = Avx.Add(my1, my5);
Vector256<float> mz4 = Avx.Multiply(Avx.Add(mz0, mz1), mm256_F_1_1758);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, mm256_F_n1_9615);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, mm256_F_n0_3901);
mz0 = Avx.Multiply(mz0, mm256_F_n0_8999);
mz1 = Avx.Multiply(mz1, mm256_F_n2_5629);
Vector256<float> mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, mm256_F_0_2986), mz2);
Vector256<float> mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, mm256_F_2_0531), mz3);
Vector256<float> mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, mm256_F_3_0727), mz2);
Vector256<float> mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, mm256_F_1_5013), mz3);
Vector256<float> my2 = s.V2;
Vector256<float> my6 = s.V6;
mz4 = Avx.Multiply(Avx.Add(my2, my6), mm256_F_0_5411);
Vector256<float> my0 = s.V0;
Vector256<float> my4 = s.V4;
mz0 = Avx.Add(my0, my4);
mz1 = Avx.Subtract(my0, my4);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, mm256_F_n1_8477);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, mm256_F_0_7653);
my0 = Avx.Add(mz0, mz3);
my3 = Avx.Subtract(mz0, mz3);
my1 = Avx.Add(mz1, mz2);
my2 = Avx.Subtract(mz1, mz2);
d.V0 = Avx.Add(my0, mb0);
d.V7 = Avx.Subtract(my0, mb0);
d.V1 = Avx.Add(my1, mb1);
d.V6 = Avx.Subtract(my1, mb1);
d.V2 = Avx.Add(my2, mb2);
d.V5 = Avx.Subtract(my2, mb2);
d.V3 = Avx.Add(my3, mb3);
d.V4 = Avx.Subtract(my3, mb3);
// First pass - process columns
IDCT8x8_1D_Avx(ref transposedBlock);
// Second pass - process rows
transposedBlock.TransposeInplace();
IDCT8x8_1D_Avx(ref transposedBlock);
// Applies 1D floating point FDCT inplace
static void IDCT8x8_1D_Avx(ref Block8x8F block)
{
// Even part
Vector256<float> tmp0 = block.V0;
Vector256<float> tmp1 = block.V2;
Vector256<float> tmp2 = block.V4;
Vector256<float> tmp3 = block.V6;
Vector256<float> z5 = tmp0;
Vector256<float> tmp10 = Avx.Add(z5, tmp2);
Vector256<float> tmp11 = Avx.Subtract(z5, tmp2);
Vector256<float> tmp13 = Avx.Add(tmp1, tmp3);
Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);
tmp0 = Avx.Add(tmp10, tmp13);
tmp3 = Avx.Subtract(tmp10, tmp13);
tmp1 = Avx.Add(tmp11, tmp12);
tmp2 = Avx.Subtract(tmp11, tmp12);
// Odd part
Vector256<float> tmp4 = block.V1;
Vector256<float> tmp5 = block.V3;
Vector256<float> tmp6 = block.V5;
Vector256<float> tmp7 = block.V7;
Vector256<float> z13 = Avx.Add(tmp6, tmp5);
Vector256<float> z10 = Avx.Subtract(tmp6, tmp5);
Vector256<float> z11 = Avx.Add(tmp4, tmp7);
Vector256<float> z12 = Avx.Subtract(tmp4, tmp7);
tmp7 = Avx.Add(z11, z13);
tmp11 = Avx.Multiply(Avx.Subtract(z11, z13), mm256_F_1_4142);
z5 = Avx.Multiply(Avx.Add(z10, z12), mm256_F_1_8477);
tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, mm256_F_n1_0823);
tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, mm256_F_n2_6131);
tmp6 = Avx.Subtract(tmp12, tmp7);
tmp5 = Avx.Subtract(tmp11, tmp6);
tmp4 = Avx.Subtract(tmp10, tmp5);
block.V0 = Avx.Add(tmp0, tmp7);
block.V7 = Avx.Subtract(tmp0, tmp7);
block.V1 = Avx.Add(tmp1, tmp6);
block.V6 = Avx.Subtract(tmp1, tmp6);
block.V2 = Avx.Add(tmp2, tmp5);
block.V5 = Avx.Subtract(tmp2, tmp5);
block.V3 = Avx.Add(tmp3, tmp4);
block.V4 = Avx.Subtract(tmp3, tmp4);
}
}
}
}

532
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -3,6 +3,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
@ -15,102 +16,202 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary>
internal static partial class FastFloatingPointDCT
{
#pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore
private const float C_1_175876 = 1.175875602f;
private const float C_1_961571 = -1.961570560f;
private const float C_0_390181 = -0.390180644f;
private const float C_0_899976 = -0.899976223f;
private const float C_2_562915 = -2.562915447f;
private const float C_0_298631 = 0.298631336f;
private const float C_2_053120 = 2.053119869f;
private const float C_3_072711 = 3.072711026f;
private const float C_1_501321 = 1.501321110f;
private const float C_0_541196 = 0.541196100f;
private const float C_1_847759 = -1.847759065f;
private const float C_0_765367 = 0.765366865f;
private const float C_0_125 = 0.1250f;
#pragma warning disable SA1311, IDE1006 // naming rules violation warnings
private static readonly Vector4 mm128_F_0_7071 = new Vector4(0.707106781f);
private static readonly Vector4 mm128_F_0_3826 = new Vector4(0.382683433f);
private static readonly Vector4 mm128_F_0_5411 = new Vector4(0.541196100f);
private static readonly Vector4 mm128_F_1_3065 = new Vector4(1.306562965f);
#pragma warning restore SA1311, IDE1006
#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore
#pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings
private static readonly Vector4 mm128_F_0_7071 = new(0.707106781f);
private static readonly Vector4 mm128_F_0_3826 = new(0.382683433f);
private static readonly Vector4 mm128_F_0_5411 = new(0.541196100f);
private static readonly Vector4 mm128_F_1_3065 = new(1.306562965f);
private static readonly Vector4 mm128_F_1_4142 = new(1.414213562f);
private static readonly Vector4 mm128_F_1_8477 = new(1.847759065f);
private static readonly Vector4 mm128_F_n1_0823 = new(-1.082392200f);
private static readonly Vector4 mm128_F_n2_6131 = new(-2.613125930f);
#pragma warning restore SA1310, SA1311, IDE1006
/// <summary>
/// Gets reciprocal coefficients for jpeg quantization tables calculation.
/// Gets adjustment table for quantization tables.
/// </summary>
/// <remarks>
/// <para>
/// Current FDCT implementation expects its results to be multiplied by
/// a reciprocal quantization table. To get 8x8 reciprocal block values in this
/// table must be divided by quantization table values scaled with quality settings.
/// Current IDCT and FDCT implementations are based on Arai, Agui,
/// and Nakajima's algorithm. Both DCT methods does not
/// produce finished DCT output, final step is fused into the
/// quantization step. Quantization and de-quantization coefficients
/// must be multiplied by these values.
/// </para>
/// <para>
/// These values were calculates with this formula:
/// <code>
/// value[row * 8 + col] = scalefactor[row] * scalefactor[col] * 8;
/// </code>
/// Where:
/// Given values were generated by formula:
/// <code>
/// scalefactor[row] * scalefactor[col], where
/// scalefactor[0] = 1
/// </code>
/// <code>
/// scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
/// </code>
/// Values are also scaled by 8 so DCT code won't do extra division/multiplication.
/// </para>
/// </remarks>
internal static readonly float[] DctReciprocalAdjustmentCoefficients = new float[]
private static readonly float[] AdjustmentCoefficients = new float[]
{
0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f,
0.09011998f, 0.064972885f, 0.068974845f, 0.07664074f, 0.09011998f, 0.11470097f, 0.16652f, 0.32664075f,
0.09567086f, 0.068974845f, 0.07322331f, 0.081361376f, 0.09567086f, 0.121765904f, 0.17677669f, 0.34675997f,
0.10630376f, 0.07664074f, 0.081361376f, 0.09040392f, 0.10630376f, 0.13529903f, 0.19642374f, 0.38529903f,
0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f,
0.15909483f, 0.11470097f, 0.121765904f, 0.13529903f, 0.15909483f, 0.2024893f, 0.2939689f, 0.5766407f,
0.23096988f, 0.16652f, 0.17677669f, 0.19642374f, 0.23096988f, 0.2939689f, 0.4267767f, 0.8371526f,
0.45306373f, 0.32664075f, 0.34675997f, 0.38529903f, 0.45306373f, 0.5766407f, 0.8371526f, 1.642134f,
1f, 1.3870399f, 1.306563f, 1.1758755f, 1f, 0.78569496f, 0.5411961f, 0.27589938f,
1.3870399f, 1.9238797f, 1.812255f, 1.6309863f, 1.3870399f, 1.0897902f, 0.7506606f, 0.38268346f,
1.306563f, 1.812255f, 1.707107f, 1.5363555f, 1.306563f, 1.02656f, 0.7071068f, 0.36047992f,
1.1758755f, 1.6309863f, 1.5363555f, 1.3826833f, 1.1758755f, 0.9238795f, 0.63637924f, 0.32442334f,
1f, 1.3870399f, 1.306563f, 1.1758755f, 1f, 0.78569496f, 0.5411961f, 0.27589938f,
0.78569496f, 1.0897902f, 1.02656f, 0.9238795f, 0.78569496f, 0.61731654f, 0.42521507f, 0.21677275f,
0.5411961f, 0.7506606f, 0.7071068f, 0.63637924f, 0.5411961f, 0.42521507f, 0.29289323f, 0.14931567f,
0.27589938f, 0.38268346f, 0.36047992f, 0.32442334f, 0.27589938f, 0.21677275f, 0.14931567f, 0.076120466f,
};
/// <summary>
/// Adjusts given quantization table to be complient with FDCT implementation.
/// Adjusts given quantization table for usage with <see cref="TransformIDCT"/>.
/// </summary>
/// <param name="quantTable">Quantization table to adjust.</param>
public static void AdjustToIDCT(ref Block8x8F quantTable)
{
ref float tableRef = ref Unsafe.As<Block8x8F, float>(ref quantTable);
ref float multipliersRef = ref MemoryMarshal.GetReference<float>(AdjustmentCoefficients);
for (nint i = 0; i < Block8x8F.Size; i++)
{
tableRef = 0.125f * tableRef * Unsafe.Add(ref multipliersRef, i);
tableRef = ref Unsafe.Add(ref tableRef, 1);
}
// Spectral macroblocks are transposed before quantization
// so we must transpose quantization table
quantTable.TransposeInplace();
}
/// <summary>
/// Adjusts given quantization table for usage with <see cref="TransformFDCT"/>.
/// </summary>
/// <param name="quantTable">Quantization table to adjust.</param>
public static void AdjustToFDCT(ref Block8x8F quantTable)
{
ref float tableRef = ref Unsafe.As<Block8x8F, float>(ref quantTable);
ref float multipliersRef = ref MemoryMarshal.GetReference<float>(AdjustmentCoefficients);
for (nint i = 0; i < Block8x8F.Size; i++)
{
tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i));
tableRef = ref Unsafe.Add(ref tableRef, 1);
}
}
/// <summary>
/// Apply 2D floating point IDCT inplace.
/// </summary>
/// <remarks>
/// See <see cref="DctReciprocalAdjustmentCoefficients"/> docs for explanation.
/// Input block must be dequantized before this method with table
/// adjusted by <see cref="AdjustToIDCT"/>.
/// </remarks>
/// <param name="quantizationtable">Quantization table to adjust.</param>
public static void AdjustToFDCT(ref Block8x8F quantizationtable)
/// <param name="block">Input block.</param>
public static void TransformIDCT(ref Block8x8F block)
{
for (int i = 0; i < Block8x8F.Size; i++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
quantizationtable[i] = DctReciprocalAdjustmentCoefficients[i] / quantizationtable[i];
IDCT8x8_Avx(ref block);
}
else
#endif
{
IDCT_Vector4(ref block);
}
}
/// <summary>
/// Apply 2D floating point FDCT inplace.
/// Apply 2D floating point IDCT inplace.
/// </summary>
/// <param name="block">Input matrix.</param>
/// <remarks>
/// Input block must be quantized after this method with table adjusted
/// by <see cref="AdjustToFDCT"/>.
/// </remarks>
/// <param name="block">Input block.</param>
public static void TransformFDCT(ref Block8x8F block)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
ForwardTransform_Avx(ref block);
FDCT8x8_Avx(ref block);
}
else
#endif
if (Vector.IsHardwareAccelerated)
{
ForwardTransform_Vector4(ref block);
FDCT_Vector4(ref block);
}
else
{
ForwardTransform_Scalar(ref block);
FDCT_Scalar(ref block);
}
}
/// <summary>
/// Apply floating point IDCT inplace using <see cref="Vector4"/> API.
/// </summary>
/// <param name="transposedBlock">Input block.</param>
private static void IDCT_Vector4(ref Block8x8F transposedBlock)
{
DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");
// First pass - process columns
IDCT8x4_Vector4(ref transposedBlock.V0L);
IDCT8x4_Vector4(ref transposedBlock.V0R);
// Second pass - process rows
transposedBlock.TransposeInplace();
IDCT8x4_Vector4(ref transposedBlock.V0L);
IDCT8x4_Vector4(ref transposedBlock.V0R);
// Applies 1D floating point IDCT inplace on 8x4 part of 8x8 block
static void IDCT8x4_Vector4(ref Vector4 vecRef)
{
// Even part
Vector4 tmp0 = Unsafe.Add(ref vecRef, 0 * 2);
Vector4 tmp1 = Unsafe.Add(ref vecRef, 2 * 2);
Vector4 tmp2 = Unsafe.Add(ref vecRef, 4 * 2);
Vector4 tmp3 = Unsafe.Add(ref vecRef, 6 * 2);
Vector4 z5 = tmp0;
Vector4 tmp10 = z5 + tmp2;
Vector4 tmp11 = z5 - tmp2;
Vector4 tmp13 = tmp1 + tmp3;
Vector4 tmp12 = ((tmp1 - tmp3) * mm128_F_1_4142) - tmp13;
tmp0 = tmp10 + tmp13;
tmp3 = tmp10 - tmp13;
tmp1 = tmp11 + tmp12;
tmp2 = tmp11 - tmp12;
// Odd part
Vector4 tmp4 = Unsafe.Add(ref vecRef, 1 * 2);
Vector4 tmp5 = Unsafe.Add(ref vecRef, 3 * 2);
Vector4 tmp6 = Unsafe.Add(ref vecRef, 5 * 2);
Vector4 tmp7 = Unsafe.Add(ref vecRef, 7 * 2);
Vector4 z13 = tmp6 + tmp5;
Vector4 z10 = tmp6 - tmp5;
Vector4 z11 = tmp4 + tmp7;
Vector4 z12 = tmp4 - tmp7;
tmp7 = z11 + z13;
tmp11 = (z11 - z13) * mm128_F_1_4142;
z5 = (z10 + z12) * mm128_F_1_8477;
tmp10 = (z12 * mm128_F_n1_0823) + z5;
tmp12 = (z10 * mm128_F_n2_6131) + z5;
tmp6 = tmp12 - tmp7;
tmp5 = tmp11 - tmp6;
tmp4 = tmp10 - tmp5;
Unsafe.Add(ref vecRef, 0 * 2) = tmp0 + tmp7;
Unsafe.Add(ref vecRef, 7 * 2) = tmp0 - tmp7;
Unsafe.Add(ref vecRef, 1 * 2) = tmp1 + tmp6;
Unsafe.Add(ref vecRef, 6 * 2) = tmp1 - tmp6;
Unsafe.Add(ref vecRef, 2 * 2) = tmp2 + tmp5;
Unsafe.Add(ref vecRef, 5 * 2) = tmp2 - tmp5;
Unsafe.Add(ref vecRef, 3 * 2) = tmp3 + tmp4;
Unsafe.Add(ref vecRef, 4 * 2) = tmp3 - tmp4;
}
}
@ -120,8 +221,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <remarks>
/// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c.
/// </remarks>
/// <param name="block">Input matrix.</param>
private static void ForwardTransform_Scalar(ref Block8x8F block)
/// <param name="block">Input block.</param>
private static void FDCT_Scalar(ref Block8x8F block)
{
const int dctSize = 8;
@ -130,17 +231,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
float z1, z2, z3, z4, z5, z11, z13;
// First pass - process rows
ref float dataRef = ref Unsafe.As<Block8x8F, float>(ref block);
ref float blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--)
{
tmp0 = Unsafe.Add(ref dataRef, 0) + Unsafe.Add(ref dataRef, 7);
tmp7 = Unsafe.Add(ref dataRef, 0) - Unsafe.Add(ref dataRef, 7);
tmp1 = Unsafe.Add(ref dataRef, 1) + Unsafe.Add(ref dataRef, 6);
tmp6 = Unsafe.Add(ref dataRef, 1) - Unsafe.Add(ref dataRef, 6);
tmp2 = Unsafe.Add(ref dataRef, 2) + Unsafe.Add(ref dataRef, 5);
tmp5 = Unsafe.Add(ref dataRef, 2) - Unsafe.Add(ref dataRef, 5);
tmp3 = Unsafe.Add(ref dataRef, 3) + Unsafe.Add(ref dataRef, 4);
tmp4 = Unsafe.Add(ref dataRef, 3) - Unsafe.Add(ref dataRef, 4);
tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 7);
tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 7);
tmp1 = Unsafe.Add(ref blockRef, 1) + Unsafe.Add(ref blockRef, 6);
tmp6 = Unsafe.Add(ref blockRef, 1) - Unsafe.Add(ref blockRef, 6);
tmp2 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 5);
tmp5 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 5);
tmp3 = Unsafe.Add(ref blockRef, 3) + Unsafe.Add(ref blockRef, 4);
tmp4 = Unsafe.Add(ref blockRef, 3) - Unsafe.Add(ref blockRef, 4);
// Even part
tmp10 = tmp0 + tmp3;
@ -148,12 +249,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
Unsafe.Add(ref dataRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref dataRef, 4) = tmp10 - tmp11;
Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref blockRef, 4) = tmp10 - tmp11;
z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref dataRef, 2) = tmp13 + z1;
Unsafe.Add(ref dataRef, 6) = tmp13 - z1;
Unsafe.Add(ref blockRef, 2) = tmp13 + z1;
Unsafe.Add(ref blockRef, 6) = tmp13 - z1;
// Odd part
tmp10 = tmp4 + tmp5;
@ -168,26 +269,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
z11 = tmp7 + z3;
z13 = tmp7 - z3;
Unsafe.Add(ref dataRef, 5) = z13 + z2;
Unsafe.Add(ref dataRef, 3) = z13 - z2;
Unsafe.Add(ref dataRef, 1) = z11 + z4;
Unsafe.Add(ref dataRef, 7) = z11 - z4;
Unsafe.Add(ref blockRef, 5) = z13 + z2;
Unsafe.Add(ref blockRef, 3) = z13 - z2;
Unsafe.Add(ref blockRef, 1) = z11 + z4;
Unsafe.Add(ref blockRef, 7) = z11 - z4;
dataRef = ref Unsafe.Add(ref dataRef, dctSize);
blockRef = ref Unsafe.Add(ref blockRef, dctSize);
}
// Second pass - process columns
dataRef = ref Unsafe.As<Block8x8F, float>(ref block);
blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--)
{
tmp0 = Unsafe.Add(ref dataRef, dctSize * 0) + Unsafe.Add(ref dataRef, dctSize * 7);
tmp7 = Unsafe.Add(ref dataRef, dctSize * 0) - Unsafe.Add(ref dataRef, dctSize * 7);
tmp1 = Unsafe.Add(ref dataRef, dctSize * 1) + Unsafe.Add(ref dataRef, dctSize * 6);
tmp6 = Unsafe.Add(ref dataRef, dctSize * 1) - Unsafe.Add(ref dataRef, dctSize * 6);
tmp2 = Unsafe.Add(ref dataRef, dctSize * 2) + Unsafe.Add(ref dataRef, dctSize * 5);
tmp5 = Unsafe.Add(ref dataRef, dctSize * 2) - Unsafe.Add(ref dataRef, dctSize * 5);
tmp3 = Unsafe.Add(ref dataRef, dctSize * 3) + Unsafe.Add(ref dataRef, dctSize * 4);
tmp4 = Unsafe.Add(ref dataRef, dctSize * 3) - Unsafe.Add(ref dataRef, dctSize * 4);
tmp0 = Unsafe.Add(ref blockRef, dctSize * 0) + Unsafe.Add(ref blockRef, dctSize * 7);
tmp7 = Unsafe.Add(ref blockRef, dctSize * 0) - Unsafe.Add(ref blockRef, dctSize * 7);
tmp1 = Unsafe.Add(ref blockRef, dctSize * 1) + Unsafe.Add(ref blockRef, dctSize * 6);
tmp6 = Unsafe.Add(ref blockRef, dctSize * 1) - Unsafe.Add(ref blockRef, dctSize * 6);
tmp2 = Unsafe.Add(ref blockRef, dctSize * 2) + Unsafe.Add(ref blockRef, dctSize * 5);
tmp5 = Unsafe.Add(ref blockRef, dctSize * 2) - Unsafe.Add(ref blockRef, dctSize * 5);
tmp3 = Unsafe.Add(ref blockRef, dctSize * 3) + Unsafe.Add(ref blockRef, dctSize * 4);
tmp4 = Unsafe.Add(ref blockRef, dctSize * 3) - Unsafe.Add(ref blockRef, dctSize * 4);
// Even part
tmp10 = tmp0 + tmp3;
@ -195,12 +296,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
Unsafe.Add(ref dataRef, dctSize * 0) = tmp10 + tmp11;
Unsafe.Add(ref dataRef, dctSize * 4) = tmp10 - tmp11;
Unsafe.Add(ref blockRef, dctSize * 0) = tmp10 + tmp11;
Unsafe.Add(ref blockRef, dctSize * 4) = tmp10 - tmp11;
z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref dataRef, dctSize * 2) = tmp13 + z1;
Unsafe.Add(ref dataRef, dctSize * 6) = tmp13 - z1;
Unsafe.Add(ref blockRef, dctSize * 2) = tmp13 + z1;
Unsafe.Add(ref blockRef, dctSize * 6) = tmp13 - z1;
// Odd part
tmp10 = tmp4 + tmp5;
@ -215,12 +316,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
z11 = tmp7 + z3;
z13 = tmp7 - z3;
Unsafe.Add(ref dataRef, dctSize * 5) = z13 + z2;
Unsafe.Add(ref dataRef, dctSize * 3) = z13 - z2;
Unsafe.Add(ref dataRef, dctSize * 1) = z11 + z4;
Unsafe.Add(ref dataRef, dctSize * 7) = z11 - z4;
Unsafe.Add(ref blockRef, dctSize * 5) = z13 + z2;
Unsafe.Add(ref blockRef, dctSize * 3) = z13 - z2;
Unsafe.Add(ref blockRef, dctSize * 1) = z11 + z4;
Unsafe.Add(ref blockRef, dctSize * 7) = z11 - z4;
dataRef = ref Unsafe.Add(ref dataRef, 1);
blockRef = ref Unsafe.Add(ref blockRef, 1);
}
}
@ -230,11 +331,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <remarks>
/// This implementation must be called only if hardware supports 4
/// floating point numbers vector. Otherwise explicit scalar
/// implementation <see cref="ForwardTransform_Scalar"/> is faster
/// because it does not rely on matrix transposition.
/// implementation <see cref="FDCT_Scalar"/> is faster
/// because it does not rely on block transposition.
/// </remarks>
/// <param name="block">Input matrix.</param>
private static void ForwardTransform_Vector4(ref Block8x8F block)
/// <param name="block">Input block.</param>
public static void FDCT_Vector4(ref Block8x8F block)
{
DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");
@ -247,209 +348,50 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
block.TransposeInplace();
FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R);
}
/// <summary>
/// Apply 1D floating point FDCT inplace on 8x4 part of 8x8 matrix.
/// </summary>
/// <remarks>
/// Implemented using Vector4 API operations for either scalar or sse hardware implementation.
/// Must be called on both 8x4 matrix parts for the full FDCT transform.
/// </remarks>
/// <param name="blockRef">Input reference to the first </param>
private static void FDCT8x4_Vector4(ref Vector4 blockRef)
{
Vector4 tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 14);
Vector4 tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 14);
Vector4 tmp1 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 12);
Vector4 tmp6 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 12);
Vector4 tmp2 = Unsafe.Add(ref blockRef, 4) + Unsafe.Add(ref blockRef, 10);
Vector4 tmp5 = Unsafe.Add(ref blockRef, 4) - Unsafe.Add(ref blockRef, 10);
Vector4 tmp3 = Unsafe.Add(ref blockRef, 6) + Unsafe.Add(ref blockRef, 8);
Vector4 tmp4 = Unsafe.Add(ref blockRef, 6) - Unsafe.Add(ref blockRef, 8);
// Even part
Vector4 tmp10 = tmp0 + tmp3;
Vector4 tmp13 = tmp0 - tmp3;
Vector4 tmp11 = tmp1 + tmp2;
Vector4 tmp12 = tmp1 - tmp2;
Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref blockRef, 8) = tmp10 - tmp11;
Vector4 z1 = (tmp12 + tmp13) * mm128_F_0_7071;
Unsafe.Add(ref blockRef, 4) = tmp13 + z1;
Unsafe.Add(ref blockRef, 12) = tmp13 - z1;
// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
Vector4 z5 = (tmp10 - tmp12) * mm128_F_0_3826;
Vector4 z2 = (mm128_F_0_5411 * tmp10) + z5;
Vector4 z4 = (mm128_F_1_3065 * tmp12) + z5;
Vector4 z3 = tmp11 * mm128_F_0_7071;
Vector4 z11 = tmp7 + z3;
Vector4 z13 = tmp7 - z3;
Unsafe.Add(ref blockRef, 10) = z13 + z2;
Unsafe.Add(ref blockRef, 6) = z13 - z2;
Unsafe.Add(ref blockRef, 2) = z11 + z4;
Unsafe.Add(ref blockRef, 14) = z11 - z4;
}
// Applies 1D floating point FDCT inplace on 8x4 part of 8x8 block
static void FDCT8x4_Vector4(ref Vector4 vecRef)
{
Vector4 tmp0 = Unsafe.Add(ref vecRef, 0) + Unsafe.Add(ref vecRef, 14);
Vector4 tmp7 = Unsafe.Add(ref vecRef, 0) - Unsafe.Add(ref vecRef, 14);
Vector4 tmp1 = Unsafe.Add(ref vecRef, 2) + Unsafe.Add(ref vecRef, 12);
Vector4 tmp6 = Unsafe.Add(ref vecRef, 2) - Unsafe.Add(ref vecRef, 12);
Vector4 tmp2 = Unsafe.Add(ref vecRef, 4) + Unsafe.Add(ref vecRef, 10);
Vector4 tmp5 = Unsafe.Add(ref vecRef, 4) - Unsafe.Add(ref vecRef, 10);
Vector4 tmp3 = Unsafe.Add(ref vecRef, 6) + Unsafe.Add(ref vecRef, 8);
Vector4 tmp4 = Unsafe.Add(ref vecRef, 6) - Unsafe.Add(ref vecRef, 8);
/// <summary>
/// Apply floating point IDCT inplace.
/// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239.
/// </summary>
/// <param name="block">Input matrix.</param>
/// <param name="temp">Matrix to store temporal results.</param>
public static void TransformIDCT(ref Block8x8F block, ref Block8x8F temp)
{
block.TransposeInplace();
IDCT8x8(ref block, ref temp);
temp.TransposeInplace();
IDCT8x8(ref temp, ref block);
// Even part
Vector4 tmp10 = tmp0 + tmp3;
Vector4 tmp13 = tmp0 - tmp3;
Vector4 tmp11 = tmp1 + tmp2;
Vector4 tmp12 = tmp1 - tmp2;
// TODO: This can be fused into quantization table step
block.MultiplyInPlace(C_0_125);
}
Unsafe.Add(ref vecRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref vecRef, 8) = tmp10 - tmp11;
/// <summary>
/// Performs 8x8 matrix Inverse Discrete Cosine Transform
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
private static void IDCT8x8(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
IDCT8x8_Avx(ref s, ref d);
}
else
#endif
{
IDCT8x4_LeftPart(ref s, ref d);
IDCT8x4_RightPart(ref s, ref d);
}
}
Vector4 z1 = (tmp12 + tmp13) * mm128_F_0_7071;
Unsafe.Add(ref vecRef, 4) = tmp13 + z1;
Unsafe.Add(ref vecRef, 12) = tmp13 - z1;
/// <summary>
/// Do IDCT internal operations on the left part of the block. Original src:
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261
/// </summary>
/// <param name="s">The source block</param>
/// <param name="d">Destination block</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d)
{
Vector4 my1 = s.V1L;
Vector4 my7 = s.V7L;
Vector4 mz0 = my1 + my7;
Vector4 my3 = s.V3L;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5L;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2L;
Vector4 my6 = s.V6L;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0L;
Vector4 my4 = s.V4L;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0L = my0 + mb0;
d.V7L = my0 - mb0;
d.V1L = my1 + mb1;
d.V6L = my1 - mb1;
d.V2L = my2 + mb2;
d.V5L = my2 - mb2;
d.V3L = my3 + mb3;
d.V4L = my3 - mb3;
}
// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
/// <summary>
/// Do IDCT internal operations on the right part of the block.
/// Original src:
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261
/// </summary>
/// <param name="s">The source block</param>
/// <param name="d">The destination block</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d)
{
Vector4 my1 = s.V1R;
Vector4 my7 = s.V7R;
Vector4 mz0 = my1 + my7;
Vector4 my3 = s.V3R;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5R;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2R;
Vector4 my6 = s.V6R;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0R;
Vector4 my4 = s.V4R;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0R = my0 + mb0;
d.V7R = my0 - mb0;
d.V1R = my1 + mb1;
d.V6R = my1 - mb1;
d.V2R = my2 + mb2;
d.V5R = my2 - mb2;
d.V3R = my3 + mb3;
d.V4R = my3 - mb3;
Vector4 z5 = (tmp10 - tmp12) * mm128_F_0_3826;
Vector4 z2 = (mm128_F_0_5411 * tmp10) + z5;
Vector4 z4 = (mm128_F_1_3065 * tmp12) + z5;
Vector4 z3 = tmp11 * mm128_F_0_7071;
Vector4 z11 = tmp7 + z3;
Vector4 z13 = tmp7 - z3;
Unsafe.Add(ref vecRef, 10) = z13 + z2;
Unsafe.Add(ref vecRef, 6) = z13 - z2;
Unsafe.Add(ref vecRef, 2) = z11 + z4;
Unsafe.Add(ref vecRef, 14) = z11 - z4;
}
}
}
}

29
src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs

@ -35,5 +35,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63
};
/// <summary>
/// Gets span of zig-zag with fused transpose step ordering indices.
/// </summary>
/// <remarks>
/// When reading corrupted data, the Huffman decoders could attempt
/// to reference an entry beyond the end of this array (if the decoded
/// zero run length reaches past the end of the block). To prevent
/// wild stores without adding an inner-loop test, we put some extra
/// "63"s after the real entries. This will cause the extra coefficient
/// to be stored in location 63 of the block, not somewhere random.
/// The worst case would be a run-length of 15, which means we need 16
/// fake entries.
/// </remarks>
public static ReadOnlySpan<byte> TransposingOrder => new byte[]
{
0, 8, 1, 2, 9, 16, 24, 17,
10, 3, 4, 11, 18, 25, 32, 40,
33, 26, 19, 12, 5, 6, 13, 20,
27, 34, 41, 48, 56, 49, 42, 35,
28, 21, 14, 7, 15, 22, 29, 36,
43, 50, 57, 58, 51, 44, 37, 30,
23, 31, 38, 45, 52, 59, 60, 53,
46, 39, 47, 54, 61, 62, 55, 63,
// Extra entries for safety in decoder
63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63
};
}
}

3
src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs

@ -942,6 +942,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
break;
}
}
// Adjusting table for IDCT step during decompression
FastFloatingPointDCT.AdjustToIDCT(ref table);
}
}

4
src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs

@ -288,8 +288,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
/// <param name="componentCount">The number of components to write.</param>
private void WriteDefineHuffmanTables(int componentCount)
{
// This uses a C#'s compiler optimization that refers to the static data segment of the assembly,
// and doesn't incur any allocation at all.
// Table identifiers.
ReadOnlySpan<byte> headers = stackalloc byte[]
ReadOnlySpan<byte> headers = new byte[]
{
0x00,
0x10,

4
src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs

@ -256,8 +256,8 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Compressors
private void ResetTables()
{
this.children.GetSpan().Fill(0);
this.siblings.GetSpan().Fill(0);
this.children.GetSpan().Clear();
this.siblings.GetSpan().Clear();
this.bitsPerCode = MinBits;
this.maxCode = MaxValue(this.bitsPerCode);
this.nextValidCode = EoiCode + 1;

12
src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs

@ -64,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
uint bitsWritten = 0;
for (int y = 0; y < height; y++)
{
scanLine.Fill(0);
scanLine.Clear();
Decode2DScanline(bitReader, this.isWhiteZero, referenceScanLine, scanLine);
bitsWritten = this.WriteScanLine(buffer, scanLine, bitsWritten);
@ -116,7 +116,15 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
{
// If a TIFF reader encounters EOFB before the expected number of lines has been extracted,
// it is appropriate to assume that the missing rows consist entirely of white pixels.
scanline.Fill(whiteIsZero ? (byte)0 : (byte)255);
if (whiteIsZero)
{
scanline.Clear();
}
else
{
scanline.Fill((byte)255);
}
break;
}

2
src/ImageSharp/Formats/Webp/EntropyIx.cs

@ -6,7 +6,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
/// <summary>
/// These five modes are evaluated and their respective entropy is computed.
/// </summary>
internal enum EntropyIx
internal enum EntropyIx : byte
{
Direct = 0,

2
src/ImageSharp/Formats/Webp/HistoIx.cs

@ -3,7 +3,7 @@
namespace SixLabors.ImageSharp.Formats.Webp
{
internal enum HistoIx
internal enum HistoIx : byte
{
HistoAlpha = 0,

49
src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs

@ -2,11 +2,13 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers;
using System.Collections.Generic;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal class BackwardReferenceEncoder
internal static class BackwardReferenceEncoder
{
/// <summary>
/// Maximum bit length.
@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int quality,
int lz77TypesToTry,
ref int cacheBits,
MemoryAllocator memoryAllocator,
Vp8LHashChain hashChain,
Vp8LBackwardRefs best,
Vp8LBackwardRefs worst)
@ -69,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
BackwardReferencesLz77(width, height, bgra, 0, hashChain, worst);
break;
case Vp8LLz77Type.Lz77Box:
hashChainBox = new Vp8LHashChain(width * height);
hashChainBox = new Vp8LHashChain(memoryAllocator, width * height);
BackwardReferencesLz77Box(width, height, bgra, 0, hashChain, hashChainBox, worst);
break;
}
@ -100,7 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if ((lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard || lz77TypeBest == (int)Vp8LLz77Type.Lz77Box) && quality >= 25)
{
Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox;
BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst);
BackwardReferencesTraceBackwards(width, height, memoryAllocator, bgra, cacheBits, hashChainTmp, best, worst);
var histo = new Vp8LHistogram(worst, cacheBits);
double bitCostTrace = histo.EstimateBits(stats, bitsEntropy);
if (bitCostTrace < bitCostBest)
@ -111,6 +114,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
BackwardReferences2DLocality(width, best);
hashChainBox?.Dispose();
return best;
}
@ -234,6 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static void BackwardReferencesTraceBackwards(
int xSize,
int ySize,
MemoryAllocator memoryAllocator,
ReadOnlySpan<uint> bgra,
int cacheBits,
Vp8LHashChain hashChain,
@ -241,22 +247,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LBackwardRefs refsDst)
{
int distArraySize = xSize * ySize;
ushort[] distArray = new ushort[distArraySize];
using IMemoryOwner<ushort> distArrayBuffer = memoryAllocator.Allocate<ushort>(distArraySize);
Span<ushort> distArray = distArrayBuffer.GetSpan();
BackwardReferencesHashChainDistanceOnly(xSize, ySize, bgra, cacheBits, hashChain, refsSrc, distArray);
BackwardReferencesHashChainDistanceOnly(xSize, ySize, memoryAllocator, bgra, cacheBits, hashChain, refsSrc, distArrayBuffer);
int chosenPathSize = TraceBackwards(distArray, distArraySize);
Span<ushort> chosenPath = distArray.AsSpan(distArraySize - chosenPathSize);
Span<ushort> chosenPath = distArray.Slice(distArraySize - chosenPathSize);
BackwardReferencesHashChainFollowChosenPath(bgra, cacheBits, chosenPath, chosenPathSize, hashChain, refsDst);
}
private static void BackwardReferencesHashChainDistanceOnly(
int xSize,
int ySize,
MemoryAllocator memoryAllocator,
ReadOnlySpan<uint> bgra,
int cacheBits,
Vp8LHashChain hashChain,
Vp8LBackwardRefs refs,
ushort[] distArray)
IMemoryOwner<ushort> distArrayBuffer)
{
int pixCount = xSize * ySize;
bool useColorCache = cacheBits > 0;
@ -275,22 +283,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
costModel.Build(xSize, cacheBits, refs);
var costManager = new CostManager(distArray, pixCount, costModel);
using var costManager = new CostManager(memoryAllocator, distArrayBuffer, pixCount, costModel);
Span<float> costManagerCosts = costManager.Costs.GetSpan();
Span<ushort> distArray = distArrayBuffer.GetSpan();
// We loop one pixel at a time, but store all currently best points to non-processed locations from this point.
distArray[0] = 0;
// Add first pixel as literal.
AddSingleLiteralWithCostModel(bgra, colorCache, costModel, 0, useColorCache, 0.0f, costManager.Costs, distArray);
AddSingleLiteralWithCostModel(bgra, colorCache, costModel, 0, useColorCache, 0.0f, costManagerCosts, distArray);
for (int i = 1; i < pixCount; i++)
{
float prevCost = costManager.Costs[i - 1];
float prevCost = costManagerCosts[i - 1];
int offset = hashChain.FindOffset(i);
int len = hashChain.FindLength(i);
// Try adding the pixel as a literal.
AddSingleLiteralWithCostModel(bgra, colorCache, costModel, i, useColorCache, prevCost, costManager.Costs, distArray);
AddSingleLiteralWithCostModel(bgra, colorCache, costModel, i, useColorCache, prevCost, costManagerCosts, distArray);
// If we are dealing with a non-literal.
if (len >= 2)
@ -334,7 +344,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
costManager.UpdateCostAtIndex(j - 1, false);
costManager.UpdateCostAtIndex(j, false);
costManager.PushInterval(costManager.Costs[j - 1] + offsetCost, j, lenJ);
costManager.PushInterval(costManagerCosts[j - 1] + offsetCost, j, lenJ);
reach = j + lenJ - 1;
}
}
@ -346,7 +356,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
private static int TraceBackwards(ushort[] distArray, int distArraySize)
private static int TraceBackwards(Span<ushort> distArray, int distArraySize)
{
int chosenPathSize = 0;
int pathPos = distArraySize;
@ -426,8 +436,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int idx,
bool useColorCache,
float prevCost,
float[] cost,
ushort[] distArray)
Span<float> cost,
Span<ushort> distArray)
{
double costVal = prevCost;
uint color = bgra[idx];
@ -617,7 +627,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
hashChain.OffsetLength[0] = 0;
Span<uint> hashChainOffsetLength = hashChain.OffsetLength.GetSpan();
hashChainOffsetLength[0] = 0;
for (i = 1; i < pixelCount; i++)
{
int ind;
@ -695,19 +706,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (bestLength <= MinLength)
{
hashChain.OffsetLength[i] = 0;
hashChainOffsetLength[i] = 0;
bestOffsetPrev = 0;
bestLengthPrev = 0;
}
else
{
hashChain.OffsetLength[i] = (uint)((bestOffset << MaxLengthBits) | bestLength);
hashChainOffsetLength[i] = (uint)((bestOffset << MaxLengthBits) | bestLength);
bestOffsetPrev = bestOffset;
bestLengthPrev = bestLength;
}
}
hashChain.OffsetLength[0] = 0;
hashChainOffsetLength[0] = 0;
BackwardReferencesLz77(xSize, ySize, bgra, cacheBits, hashChain, refs);
}

268
src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs

@ -0,0 +1,268 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal static class ColorSpaceTransformUtils
{
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
private static readonly Vector256<byte> CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte();
private static readonly Vector256<byte> CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte();
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
#endif
public static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && tileWidth >= 16)
{
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r = Avx2.Or(r0, r1);
Vector256<byte> gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
Vector256<byte> gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
Vector256<ushort> gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector256<byte> g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
Vector256<short> a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
Vector256<byte> c = Avx2.Subtract(gb.AsByte(), b.AsByte());
Vector256<byte> d = Avx2.Subtract(c, a.AsByte());
Vector256<byte> e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else if (Sse41.IsSupported)
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else
#endif
{
CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
}
}
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
public static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && tileWidth >= 16)
{
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0
Vector256<byte> g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256);
Vector256<ushort> g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector256<int> a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector256<int> a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16);
Vector256<ushort> a = Avx2.PackUnsignedSaturate(a0, a1); // x r
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector256<byte> c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector256<byte> d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r'
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = d.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = d.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else
#endif
{
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
}
}
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
}
}

74
src/ImageSharp/Formats/Webp/Lossless/CostManager.cs

@ -1,7 +1,10 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers;
using System.Collections.Generic;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
@ -10,20 +13,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// It caches the different CostCacheInterval, caches the different
/// GetLengthCost(costModel, k) in costCache and the CostInterval's.
/// </summary>
internal class CostManager
internal sealed class CostManager : IDisposable
{
private CostInterval head;
public CostManager(ushort[] distArray, int pixCount, CostModel costModel)
private const int FreeIntervalsStartCount = 25;
private readonly Stack<CostInterval> freeIntervals = new(FreeIntervalsStartCount);
public CostManager(MemoryAllocator memoryAllocator, IMemoryOwner<ushort> distArray, int pixCount, CostModel costModel)
{
int costCacheSize = pixCount > BackwardReferenceEncoder.MaxLength ? BackwardReferenceEncoder.MaxLength : pixCount;
this.CacheIntervals = new List<CostCacheInterval>();
this.CostCache = new List<double>();
this.Costs = new float[pixCount];
this.Costs = memoryAllocator.Allocate<float>(pixCount);
this.DistArray = distArray;
this.Count = 0;
for (int i = 0; i < FreeIntervalsStartCount; i++)
{
this.freeIntervals.Push(new CostInterval());
}
// Fill in the cost cache.
this.CacheIntervalsSize++;
this.CostCache.Add(costModel.GetLengthCost(0));
@ -64,10 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
// Set the initial costs high for every pixel as we will keep the minimum.
for (int i = 0; i < pixCount; i++)
{
this.Costs[i] = 1e38f;
}
this.Costs.GetSpan().Fill(1e38f);
}
/// <summary>
@ -82,9 +91,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public int CacheIntervalsSize { get; }
public float[] Costs { get; }
public IMemoryOwner<float> Costs { get; }
public ushort[] DistArray { get; }
public IMemoryOwner<ushort> DistArray { get; }
public List<CostCacheInterval> CacheIntervals { get; }
@ -128,6 +137,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// interval logic, just serialize it right away. This constant is empirical.
int skipDistance = 10;
Span<float> costs = this.Costs.GetSpan();
Span<ushort> distArray = this.DistArray.GetSpan();
if (len < skipDistance)
{
for (int j = position; j < position + len; j++)
@ -135,10 +146,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int k = j - position;
float costTmp = (float)(distanceCost + this.CostCache[k]);
if (this.Costs[j] > costTmp)
if (costs[j] > costTmp)
{
this.Costs[j] = costTmp;
this.DistArray[j] = (ushort)(k + 1);
costs[j] = costTmp;
distArray[j] = (ushort)(k + 1);
}
}
@ -201,10 +212,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.InsertInterval(interval, interval.Cost, interval.Index, end, endOriginal);
break;
}
else
{
interval.End = start;
}
interval.End = start;
}
}
@ -226,6 +235,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.ConnectIntervals(interval.Previous, interval.Next);
this.Count--;
interval.Next = null;
interval.Previous = null;
this.freeIntervals.Push(interval);
}
private void InsertInterval(CostInterval intervalIn, float cost, int position, int start, int end)
@ -236,13 +249,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
// TODO: should we use COST_CACHE_INTERVAL_SIZE_MAX?
var intervalNew = new CostInterval()
CostInterval intervalNew;
if (this.freeIntervals.Count > 0)
{
Cost = cost,
Start = start,
End = end,
Index = position
};
intervalNew = this.freeIntervals.Pop();
intervalNew.Cost = cost;
intervalNew.Start = start;
intervalNew.End = end;
intervalNew.Index = position;
}
else
{
intervalNew = new CostInterval() { Cost = cost, Start = start, End = end, Index = position };
}
this.PositionOrphanInterval(intervalNew, intervalIn);
this.Count++;
@ -297,12 +316,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary>
private void UpdateCost(int i, int position, float cost)
{
Span<float> costs = this.Costs.GetSpan();
Span<ushort> distArray = this.DistArray.GetSpan();
int k = i - position;
if (this.Costs[i] > cost)
if (costs[i] > cost)
{
this.Costs[i] = cost;
this.DistArray[i] = (ushort)(k + 1);
costs[i] = cost;
distArray[i] = (ushort)(k + 1);
}
}
/// <inheritdoc />
public void Dispose() => this.Costs.Dispose();
}
}

2
src/ImageSharp/Formats/Webp/Lossless/CostModel.cs

@ -87,7 +87,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (nonzeros <= 1)
{
output.AsSpan(0, numSymbols).Fill(0);
output.AsSpan(0, numSymbols).Clear();
}
else
{

10
src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs

@ -13,16 +13,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// - UsePackedTable: few enough literal symbols, so all the bit codes can fit into a small look-up table PackedTable[]
/// The common literal base, if applicable, is stored in 'LiteralArb'.
/// </summary>
internal class HTreeGroup
internal struct HTreeGroup
{
public HTreeGroup(uint packedTableSize)
{
this.HTrees = new List<HuffmanCode[]>(WebpConstants.HuffmanCodesPerMetaCode);
this.PackedTable = new HuffmanCode[packedTableSize];
for (int i = 0; i < packedTableSize; i++)
{
this.PackedTable[i] = new HuffmanCode();
}
this.IsTrivialCode = false;
this.IsTrivialLiteral = false;
this.LiteralArb = 0;
this.UsePackedTable = false;
}
/// <summary>

2
src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs

@ -287,7 +287,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Create a mapping from a cluster id to its minimal version.
int clusterMax = 0;
clusterMappingsTmp.AsSpan().Fill(0);
clusterMappingsTmp.AsSpan().Clear();
// Re-map the ids.
for (int i = 0; i < symbols.Length; i++)

2
src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs

@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// A classic way to do entropy coding where a smaller number of bits are used for more frequent codes.
/// </summary>
[DebuggerDisplay("BitsUsed: {BitsUsed}, Value: {Value}")]
internal class HuffmanCode
internal struct HuffmanCode
{
/// <summary>
/// Gets or sets the number of bits used for this symbol.

4
src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs

@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Represents the Huffman tree.
/// </summary>
[DebuggerDisplay("TotalCount = {TotalCount}, Value = {Value}, Left = {PoolIndexLeft}, Right = {PoolIndexRight}")]
internal struct HuffmanTree : IDeepCloneable
internal struct HuffmanTree
{
/// <summary>
/// Initializes a new instance of the <see cref="HuffmanTree"/> struct.
@ -57,7 +57,5 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return t1.Value < t2.Value ? -1 : 1;
}
public IDeepCloneable DeepClone() => new HuffmanTree(this);
}
}

53
src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
@ -28,7 +29,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static void CreateHuffmanTree(uint[] histogram, int treeDepthLimit, bool[] bufRle, HuffmanTree[] huffTree, HuffmanTreeCode huffCode)
{
int numSymbols = huffCode.NumSymbols;
bufRle.AsSpan().Fill(false);
bufRle.AsSpan().Clear();
OptimizeHuffmanForRle(numSymbols, bufRle, histogram);
GenerateOptimalTree(huffTree, histogram, numSymbols, treeDepthLimit, huffCode.CodeLengths);
@ -218,8 +219,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
while (treeSize > 1)
{
// Finish when we have only one root.
treePool[treePoolSize++] = (HuffmanTree)tree[treeSize - 1].DeepClone();
treePool[treePoolSize++] = (HuffmanTree)tree[treeSize - 2].DeepClone();
treePool[treePoolSize++] = tree[treeSize - 1];
treePool[treePoolSize++] = tree[treeSize - 2];
int count = treePool[treePoolSize - 1].TotalCount + treePool[treePoolSize - 2].TotalCount;
treeSize -= 2;
@ -238,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int startIdx = endIdx + num - 1;
for (int i = startIdx; i >= endIdx; i--)
{
tree[i] = (HuffmanTree)tree[i - 1].DeepClone();
tree[i] = tree[i - 1];
}
tree[k].TotalCount = count;
@ -307,9 +308,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static int BuildHuffmanTable(Span<HuffmanCode> table, int rootBits, int[] codeLengths, int codeLengthsSize)
{
Guard.MustBeGreaterThan(rootBits, 0, nameof(rootBits));
Guard.NotNull(codeLengths, nameof(codeLengths));
Guard.MustBeGreaterThan(codeLengthsSize, 0, nameof(codeLengthsSize));
DebugGuard.MustBeGreaterThan(rootBits, 0, nameof(rootBits));
DebugGuard.NotNull(codeLengths, nameof(codeLengths));
DebugGuard.MustBeGreaterThan(codeLengthsSize, 0, nameof(codeLengthsSize));
// sorted[codeLengthsSize] is a pre-allocated array for sorting symbols by code length.
int[] sorted = new int[codeLengthsSize];
@ -467,27 +468,27 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
break;
}
else if (repetitions < 11)
if (repetitions < 11)
{
tokens[pos].Code = 17;
tokens[pos].ExtraBits = (byte)(repetitions - 3);
pos++;
break;
}
else if (repetitions < 139)
if (repetitions < 139)
{
tokens[pos].Code = 18;
tokens[pos].ExtraBits = (byte)(repetitions - 11);
pos++;
break;
}
else
{
tokens[pos].Code = 18;
tokens[pos].ExtraBits = 0x7f; // 138 repeated 0s
pos++;
repetitions -= 138;
}
tokens[pos].Code = 18;
tokens[pos].ExtraBits = 0x7f; // 138 repeated 0s
pos++;
repetitions -= 138;
}
return pos;
@ -519,20 +520,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
break;
}
else if (repetitions < 7)
if (repetitions < 7)
{
tokens[pos].Code = 16;
tokens[pos].ExtraBits = (byte)(repetitions - 3);
pos++;
break;
}
else
{
tokens[pos].Code = 16;
tokens[pos].ExtraBits = 3;
pos++;
repetitions -= 6;
}
tokens[pos].Code = 16;
tokens[pos].ExtraBits = 3;
pos++;
repetitions -= 6;
}
return pos;
@ -541,7 +541,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary>
/// Get the actual bit values for a tree of bit depths.
/// </summary>
/// <param name="tree">The hiffman tree.</param>
/// <param name="tree">The huffman tree.</param>
private static void ConvertBitDepthsToSymbols(HuffmanTreeCode tree)
{
// 0 bit-depth means that the symbol does not exist.
@ -628,7 +628,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary>
private static void ReplicateValue(Span<HuffmanCode> table, int step, int end, HuffmanCode code)
{
Guard.IsTrue(end % step == 0, nameof(end), "end must be a multiple of step");
DebugGuard.IsTrue(end % step == 0, nameof(end), "end must be a multiple of step");
do
{
@ -656,6 +656,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary>
/// Heuristics for selecting the stride ranges to collapse.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private static bool ValuesShouldBeCollapsedToStrideAverage(int a, int b) => Math.Abs(a - b) < 4;
}
}

341
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -42,12 +42,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static readonly Vector128<byte> TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector256<byte> TransformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector256<byte> TransformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector256<byte> TransformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
#endif
@ -122,76 +128,67 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (Avx2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 8; i += 8)
{
int i;
for (i = 0; i + 8 <= numPixels; i += 8)
{
uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Add(input, in0g0g);
Avx.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Add(input, in0g0g);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Ssse3.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Add(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Add(input, in0g0g);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Sse2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else
#endif
{
AddGreenToBlueAndRedNoneVectorized(pixelData);
AddGreenToBlueAndRedScalar(pixelData);
}
}
private static void AddGreenToBlueAndRedNoneVectorized(Span<uint> pixelData)
private static void AddGreenToBlueAndRedScalar(Span<uint> pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
@ -211,76 +208,67 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (Avx2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 8; i += 8)
{
int i;
for (i = 0; i + 8 <= numPixels; i += 8)
{
uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Avx.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Ssse3.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Sse2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else
#endif
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData);
SubtractGreenFromBlueAndRedScalar(pixelData);
}
}
private static void SubtractGreenFromBlueAndRedNoneVectorized(Span<uint> pixelData)
private static void SubtractGreenFromBlueAndRedScalar(Span<uint> pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
@ -403,49 +391,74 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
/// </summary>
/// <param name="m">The Vp8LMultipliers.</param>
/// <param name="data">The pixel data to transform.</param>
/// <param name="pixelData">The pixel data to transform.</param>
/// <param name="numPixels">The number of pixels to process.</param>
public static void TransformColor(Vp8LMultipliers m, Span<uint> data, int numPixels)
public static void TransformColor(Vp8LMultipliers m, Span<uint> pixelData, int numPixels)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
if (Avx2.IsSupported && numPixels >= 8)
{
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nint idx;
for (idx = 0; idx <= numPixels - 8; idx += 8)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), TransformColorAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<short> e = Avx2.ShiftLeftLogical(input.AsInt16(), 8);
Vector256<short> f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector256<int> g = Avx2.ShiftRightLogical(f.AsInt32(), 16);
Vector256<byte> h = Avx2.Add(g.AsByte(), d.AsByte());
Vector256<byte> i = Avx2.And(h, TransformColorRedBlueMask256);
Vector256<byte> output = Avx2.Subtract(input.AsByte(), i);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (idx != numPixels)
{
TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
}
}
else if (Sse2.IsSupported)
{
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
fixed (uint* src = data)
nint idx;
for (idx = 0; idx <= numPixels - 4; idx += 4)
{
int idx;
for (idx = 0; idx + 4 <= numPixels; idx += 4)
{
uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask);
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask);
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (idx != numPixels)
{
TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx);
}
if (idx != numPixels)
{
TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
}
}
else
#endif
{
TransformColorNoneVectorized(m, data, numPixels);
TransformColorScalar(m, pixelData, numPixels);
}
}
private static void TransformColorNoneVectorized(Vp8LMultipliers m, Span<uint> data, int numPixels)
private static void TransformColorScalar(Vp8LMultipliers m, Span<uint> data, int numPixels)
{
for (int i = 0; i < numPixels; i++)
{
@ -471,45 +484,71 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static void TransformColorInverse(Vp8LMultipliers m, Span<uint> pixelData)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
if (Avx2.IsSupported && pixelData.Length >= 8)
{
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nint idx;
for (idx = 0; idx <= pixelData.Length - 8; idx += 8)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), TransformColorInverseAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<byte> e = Avx2.Add(input.AsByte(), d.AsByte());
Vector256<short> f = Avx2.ShiftLeftLogical(e.AsInt16(), 8);
Vector256<short> g = Avx2.MultiplyHigh(f, multsb2.AsInt16());
Vector256<int> h = Avx2.ShiftRightLogical(g.AsInt32(), 8);
Vector256<byte> i = Avx2.Add(h.AsByte(), f.AsByte());
Vector256<short> j = Avx2.ShiftRightLogical(i.AsInt16(), 8);
Vector256<byte> output = Avx2.Or(j.AsByte(), a);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (idx != pixelData.Length)
{
TransformColorInverseScalar(m, pixelData.Slice((int)idx));
}
}
else if (Sse2.IsSupported)
{
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
fixed (uint* src = pixelData)
nint idx;
for (idx = 0; idx <= pixelData.Length - 4; idx += 4)
{
int idx;
for (idx = 0; idx + 4 <= pixelData.Length; idx += 4)
{
uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
Vector128<short> g = Sse2.MultiplyHigh(f, multsb2.AsInt16());
Vector128<int> h = Sse2.ShiftRightLogical(g.AsInt32(), 8);
Vector128<byte> i = Sse2.Add(h.AsByte(), f.AsByte());
Vector128<short> j = Sse2.ShiftRightLogical(i.AsInt16(), 8);
Vector128<byte> output = Sse2.Or(j.AsByte(), a);
Sse2.Store((byte*)pos, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
Vector128<short> g = Sse2.MultiplyHigh(f, multsb2.AsInt16());
Vector128<int> h = Sse2.ShiftRightLogical(g.AsInt32(), 8);
Vector128<byte> i = Sse2.Add(h.AsByte(), f.AsByte());
Vector128<short> j = Sse2.ShiftRightLogical(i.AsInt16(), 8);
Vector128<byte> output = Sse2.Or(j.AsByte(), a);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (idx != pixelData.Length)
{
TransformColorInverseNoneVectorized(m, pixelData.Slice(idx));
}
if (idx != pixelData.Length)
{
TransformColorInverseScalar(m, pixelData.Slice((int)idx));
}
}
else
#endif
{
TransformColorInverseNoneVectorized(m, pixelData);
TransformColorInverseScalar(m, pixelData);
}
}
private static void TransformColorInverseNoneVectorized(Vp8LMultipliers m, Span<uint> pixelData)
private static void TransformColorInverseScalar(Vp8LMultipliers m, Span<uint> pixelData)
{
for (int i = 0; i < pixelData.Length; i++)
{
@ -744,6 +783,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (float)retVal;
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorRed(sbyte greenToRed, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
@ -752,6 +792,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)(newRed & 0xff);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
@ -818,15 +859,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int correction = (int)((23 * (origV & (y - 1))) >> 4);
return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction;
}
else
{
return (float)(Log2Reciprocal * v * Math.Log(v));
}
return (float)(Log2Reciprocal * v * Math.Log(v));
}
private static float FastLog2Slow(uint v)
{
Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
@ -1288,6 +1328,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector256<int> MkCst32(int hi, int lo) => Vector256.Create((hi << 16) | (lo & 0xffff));
#endif
private static uint Select(uint a, uint b, uint c, Span<short> scratch)

2
src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs

@ -6,7 +6,7 @@ using System.Diagnostics;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
[DebuggerDisplay("Mode: {Mode}, Len: {Len}, BgraOrDistance: {BgraOrDistance}")]
internal class PixOrCopy
internal sealed class PixOrCopy
{
public PixOrCopyMode Mode { get; set; }

2
src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs

@ -3,7 +3,7 @@
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal enum PixOrCopyMode
internal enum PixOrCopyMode : byte
{
Literal,

185
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -5,11 +5,6 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
/// <summary>
@ -34,22 +29,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const int PredLowEffort = 11;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
#endif
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<sbyte> DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 };
@ -572,19 +551,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)lower;
}
else
{
// upper is closer to residual than lower.
if (residual <= boundaryResidual && upper > boundaryResidual)
{
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint <= residual
// (since upper is closer than lower) and residual is below the boundary.
return (byte)(lower + (quantization >> 1));
}
return (byte)(upper & 0xff);
// upper is closer to residual than lower.
if (residual <= boundaryResidual && upper > boundaryResidual)
{
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint <= residual
// (since upper is closer than lower) and residual is below the boundary.
return (byte)(lower + (quantization >> 1));
}
return (byte)upper;
}
/// <summary>
@ -980,7 +957,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
ColorSpaceTransformUtils.CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo);
if ((byte)greenToRed == prevX.GreenToRed)
@ -1018,7 +995,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
ColorSpaceTransformUtils.CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo);
if ((byte)greenToBlue == prevX.GreenToBlue)
{
@ -1057,146 +1034,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff;
}
private static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else
#endif
{
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
}
}
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
private static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else
#endif
{
CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
}
}
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
private static float PredictionCostSpatialHistogram(int[][] accumulated, int[][] tile)
{
double retVal = 0.0d;

2
src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs

@ -7,7 +7,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal class Vp8LBackwardRefs
{
public Vp8LBackwardRefs() => this.Refs = new List<PixOrCopy>();
public Vp8LBackwardRefs(int pixels) => this.Refs = new List<PixOrCopy>(pixels);
/// <summary>
/// Gets or sets the common block-size.

26
src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs

@ -124,19 +124,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.EncodedData = memoryAllocator.Allocate<uint>(pixelCount);
this.Palette = memoryAllocator.Allocate<uint>(WebpConstants.MaxPaletteSize);
this.Refs = new Vp8LBackwardRefs[3];
this.HashChain = new Vp8LHashChain(pixelCount);
this.HashChain = new Vp8LHashChain(memoryAllocator, pixelCount);
// We round the block size up, so we're guaranteed to have at most MaxRefsBlockPerImage blocks used:
int refsBlockSize = ((pixelCount - 1) / MaxRefsBlockPerImage) + 1;
for (int i = 0; i < this.Refs.Length; i++)
{
this.Refs[i] = new Vp8LBackwardRefs
this.Refs[i] = new Vp8LBackwardRefs(pixelCount)
{
BlockSize = refsBlockSize < MinBlockSize ? MinBlockSize : refsBlockSize
};
}
}
// RFC 1951 will calm you down if you are worried about this funny sequence.
// This sequence is tuned from that, but more weighted for lower symbol count,
// and more spiking histograms.
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> StorageOrder => new byte[] { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> Order => new byte[] { 1, 2, 0, 3 };
@ -516,7 +522,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
// Calculate backward references from BGRA image.
this.HashChain.Fill(this.memoryAllocator, bgra, this.quality, width, height, lowEffort);
this.HashChain.Fill(bgra, this.quality, width, height, lowEffort);
Vp8LBitWriter bitWriterBest = config.SubConfigs.Count > 1 ? this.bitWriter.Clone() : this.bitWriter;
Vp8LBitWriter bwInit = this.bitWriter;
@ -530,6 +536,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.quality,
subConfig.Lz77,
ref cacheBits,
this.memoryAllocator,
this.HashChain,
this.Refs[0],
this.Refs[1]);
@ -736,7 +743,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
// Calculate backward references from the image pixels.
hashChain.Fill(this.memoryAllocator, bgra, quality, width, height, lowEffort);
hashChain.Fill(bgra, quality, width, height, lowEffort);
Vp8LBackwardRefs refs = BackwardReferenceEncoder.GetBackwardReferences(
width,
@ -745,6 +752,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
quality,
(int)Vp8LLz77Type.Lz77Standard | (int)Vp8LLz77Type.Lz77Rle,
ref cacheBits,
this.memoryAllocator,
hashChain,
refsTmp1,
refsTmp2);
@ -941,16 +949,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private void StoreHuffmanTreeOfHuffmanTreeToBitMask(byte[] codeLengthBitDepth)
{
// RFC 1951 will calm you down if you are worried about this funny sequence.
// This sequence is tuned from that, but more weighted for lower symbol count,
// and more spiking histograms.
byte[] storageOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// Throw away trailing zeros:
int codesToStore = WebpConstants.CodeLengthCodes;
for (; codesToStore > 4; codesToStore--)
{
if (codeLengthBitDepth[storageOrder[codesToStore - 1]] != 0)
if (codeLengthBitDepth[StorageOrder[codesToStore - 1]] != 0)
{
break;
}
@ -959,7 +962,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.bitWriter.PutBits((uint)codesToStore - 4, 4);
for (int i = 0; i < codesToStore; i++)
{
this.bitWriter.PutBits(codeLengthBitDepth[storageOrder[i]], 3);
this.bitWriter.PutBits(codeLengthBitDepth[StorageOrder[i]], 3);
}
}
@ -1803,6 +1806,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.BgraScratch.Dispose();
this.Palette.Dispose();
this.TransformData.Dispose();
this.HashChain.Dispose();
}
}
}

40
src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs

@ -8,7 +8,7 @@ using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal class Vp8LHashChain
internal sealed class Vp8LHashChain : IDisposable
{
private const uint HashMultiplierHi = 0xc6a4a793u;
@ -28,14 +28,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary>
private const int WindowSize = (1 << WindowSizeBits) - 120;
private readonly MemoryAllocator memoryAllocator;
/// <summary>
/// Initializes a new instance of the <see cref="Vp8LHashChain"/> class.
/// </summary>
/// <param name="memoryAllocator">The memory allocator.</param>
/// <param name="size">The size off the chain.</param>
public Vp8LHashChain(int size)
public Vp8LHashChain(MemoryAllocator memoryAllocator, int size)
{
this.OffsetLength = new uint[size];
this.OffsetLength.AsSpan().Fill(0xcdcdcdcd);
this.memoryAllocator = memoryAllocator;
this.OffsetLength = this.memoryAllocator.Allocate<uint>(size, AllocationOptions.Clean);
this.Size = size;
}
@ -45,16 +48,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// These 20 bits are the limit defined by GetWindowSizeForHashChain (through WindowSize = 1 &lt;&lt; 20).
/// The lower 12 bits contain the length of the match.
/// </summary>
public uint[] OffsetLength { get; }
public IMemoryOwner<uint> OffsetLength { get; }
/// <summary>
/// Gets the size of the hash chain.
/// This is the maximum size of the hash_chain that can be constructed.
/// This is the maximum size of the hashchain that can be constructed.
/// Typically this is the pixel count (width x height) for a given image.
/// </summary>
public int Size { get; }
public void Fill(MemoryAllocator memoryAllocator, ReadOnlySpan<uint> bgra, int quality, int xSize, int ySize, bool lowEffort)
public void Fill(ReadOnlySpan<uint> bgra, int quality, int xSize, int ySize, bool lowEffort)
{
int size = xSize * ySize;
int iterMax = GetMaxItersForQuality(quality);
@ -63,20 +66,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (size <= 2)
{
this.OffsetLength[0] = 0;
this.OffsetLength.GetSpan()[0] = 0;
return;
}
using IMemoryOwner<int> hashToFirstIndexBuffer = memoryAllocator.Allocate<int>(HashSize);
using IMemoryOwner<int> hashToFirstIndexBuffer = this.memoryAllocator.Allocate<int>(HashSize);
using IMemoryOwner<int> chainBuffer = this.memoryAllocator.Allocate<int>(size, AllocationOptions.Clean);
Span<int> hashToFirstIndex = hashToFirstIndexBuffer.GetSpan();
Span<int> chain = chainBuffer.GetSpan();
// Initialize hashToFirstIndex array to -1.
hashToFirstIndex.Fill(-1);
int[] chain = new int[size];
// Fill the chain linking pixels with the same hash.
bool bgraComp = bgra.Length > 1 && bgra[0] == bgra[1];
Span<uint> tmp = stackalloc uint[2];
for (pos = 0; pos < size - 2;)
{
uint hashCode;
@ -85,7 +89,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
// Consecutive pixels with the same color will share the same hash.
// We therefore use a different hash: the color and its repetition length.
uint[] tmp = new uint[2];
tmp.Clear();
uint len = 1;
tmp[0] = bgra[pos];
@ -134,7 +138,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Find the best match interval at each pixel, defined by an offset to the
// pixel and a length. The right-most pixel cannot match anything to the right
// (hence a best length of 0) and the left-most pixel nothing to the left (hence an offset of 0).
this.OffsetLength[0] = this.OffsetLength[size - 1] = 0;
Span<uint> offsetLength = this.OffsetLength.GetSpan();
offsetLength[0] = offsetLength[size - 1] = 0;
for (int basePosition = size - 2; basePosition > 0;)
{
int maxLen = LosslessUtils.MaxFindCopyLength(size - 1 - basePosition);
@ -208,7 +213,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint maxBasePosition = (uint)basePosition;
while (true)
{
this.OffsetLength[basePosition] = (bestDistance << BackwardReferenceEncoder.MaxLengthBits) | (uint)bestLength;
offsetLength[basePosition] = (bestDistance << BackwardReferenceEncoder.MaxLengthBits) | (uint)bestLength;
--basePosition;
// Stop if we don't have a match or if we are out of bounds.
@ -242,10 +247,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
public int FindLength(int basePosition) => (int)(this.OffsetLength[basePosition] & ((1U << BackwardReferenceEncoder.MaxLengthBits) - 1));
public int FindLength(int basePosition) => (int)(this.OffsetLength.GetSpan()[basePosition] & ((1U << BackwardReferenceEncoder.MaxLengthBits) - 1));
[MethodImpl(InliningOptions.ShortMethod)]
public int FindOffset(int basePosition) => (int)(this.OffsetLength[basePosition] >> BackwardReferenceEncoder.MaxLengthBits);
public int FindOffset(int basePosition) => (int)(this.OffsetLength.GetSpan()[basePosition] >> BackwardReferenceEncoder.MaxLengthBits);
/// <summary>
/// Calculates the hash for a pixel pair.
@ -280,5 +285,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return maxWindowSize > WindowSize ? WindowSize : maxWindowSize;
}
/// <inheritdoc />
public void Dispose() => this.OffsetLength.Dispose();
}
}

10
src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs

@ -320,7 +320,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else
{
output.Literal.AsSpan(0, literalSize).Fill(0);
output.Literal.AsSpan(0, literalSize).Clear();
}
}
@ -343,7 +343,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else
{
output.Red.AsSpan(0, size).Fill(0);
output.Red.AsSpan(0, size).Clear();
}
}
@ -366,7 +366,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else
{
output.Blue.AsSpan(0, size).Fill(0);
output.Blue.AsSpan(0, size).Clear();
}
}
@ -389,7 +389,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else
{
output.Alpha.AsSpan(0, size).Fill(0);
output.Alpha.AsSpan(0, size).Clear();
}
}
@ -412,7 +412,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else
{
output.Distance.AsSpan(0, size).Fill(0);
output.Distance.AsSpan(0, size).Clear();
}
}

27
src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs

@ -65,15 +65,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
FixedTableSize + 2704
};
private static readonly byte[] CodeLengthCodeOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
private static readonly int NumCodeLengthCodes = CodeLengthCodeOrder.Length;
private static readonly byte[] LiteralMap =
{
0, 1, 1, 1, 0
};
/// <summary>
/// Initializes a new instance of the <see cref="WebpLosslessDecoder"/> class.
/// </summary>
@ -87,6 +80,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.configuration = configuration;
}
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> CodeLengthCodeOrder => new byte[] { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> LiteralMap => new byte[] { 0, 1, 1, 1, 0 };
/// <summary>
/// Decodes the image from the stream using the bitreader.
/// </summary>
@ -834,10 +833,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private void BuildPackedTable(HTreeGroup hTreeGroup)
{
for (uint code = 0; code < HuffmanUtils.HuffmanPackedTableSize; ++code)
for (uint code = 0; code < HuffmanUtils.HuffmanPackedTableSize; code++)
{
uint bits = code;
HuffmanCode huff = hTreeGroup.PackedTable[bits];
ref HuffmanCode huff = ref hTreeGroup.PackedTable[bits];
HuffmanCode hCode = hTreeGroup.HTrees[HuffIndex.Green][bits];
if (hCode.Value >= WebpConstants.NumLiteralCodes)
{
@ -848,10 +847,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
huff.BitsUsed = 0;
huff.Value = 0;
bits >>= AccumulateHCode(hCode, 8, huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Red][bits], 16, huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Blue][bits], 0, huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Alpha][bits], 24, huff);
bits >>= AccumulateHCode(hCode, 8, ref huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Red][bits], 16, ref huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Blue][bits], 0, ref huff);
bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Alpha][bits], 24, ref huff);
}
}
}
@ -992,7 +991,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int AccumulateHCode(HuffmanCode hCode, int shift, HuffmanCode huff)
private static int AccumulateHCode(HuffmanCode hCode, int shift, ref HuffmanCode huff)
{
huff.BitsUsed += hCode.BitsUsed;
huff.Value |= hCode.Value << shift;

61
src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

@ -704,28 +704,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
// Transpose the two 4x4.
Vector128<short> transpose00 = Sse2.UnpackLow(b0, b1);
Vector128<short> transpose01 = Sse2.UnpackLow(b2, b3);
Vector128<short> transpose02 = Sse2.UnpackHigh(b0, b1);
Vector128<short> transpose03 = Sse2.UnpackHigh(b2, b3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
Vector128<int> transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32());
Vector128<int> transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32());
Vector128<int> transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32());
Vector128<int> transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32());
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
Vector128<long> output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64());
Vector128<long> output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64());
Vector128<long> output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64());
Vector128<long> output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64());
Vp8Transpose_2_4x4_16b(b0, b1, b2, b3, out Vector128<long> output0, out Vector128<long> output1, out Vector128<long> output2, out Vector128<long> output3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
@ -769,6 +748,44 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return Numerics.ReduceSum(result);
}
// Transpose two 4x4 16b matrices horizontally stored in registers.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Vp8Transpose_2_4x4_16b(Vector128<short> b0, Vector128<short> b1, Vector128<short> b2, Vector128<short> b3, out Vector128<long> output0, out Vector128<long> output1, out Vector128<long> output2, out Vector128<long> output3)
{
// Transpose the two 4x4.
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
Vector128<short> transpose00 = Sse2.UnpackLow(b0, b1);
Vector128<short> transpose01 = Sse2.UnpackLow(b2, b3);
Vector128<short> transpose02 = Sse2.UnpackHigh(b0, b1);
Vector128<short> transpose03 = Sse2.UnpackHigh(b2, b3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
Vector128<int> transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32());
Vector128<int> transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32());
Vector128<int> transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32());
Vector128<int> transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32());
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64());
output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64());
output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64());
output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64());
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
}
#endif
public static void TransformTwo(Span<short> src, Span<byte> dst, Span<int> scratch)

6
src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

@ -329,7 +329,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
LossyUtils.TransformWht(dcTmp, tmp, scratch);
for (n = 0; n < 16; n += 2)
{
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true, scratch);
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), scratch);
}
return nz;
@ -342,7 +342,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span<int> scratch = it.Scratch3.AsSpan(0, 16);
Vp8Encoding.FTransform(src, reference, tmp, scratch);
int nz = QuantizeBlock(tmp, levels, ref dqm.Y1);
Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch);
Vp8Encoding.ITransformOne(reference, tmp, yuvOut, scratch);
return nz;
}
@ -375,7 +375,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
for (n = 0; n < 8; n += 2)
{
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true, scratch);
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), scratch);
}
return nz << 16;

8
src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs

@ -911,7 +911,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.LeftNz[8] = 0;
this.LeftDerr.AsSpan().Fill(0);
this.LeftDerr.AsSpan().Clear();
}
private void InitTop()
@ -919,14 +919,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int topSize = this.mbw * 16;
this.YTop.AsSpan(0, topSize).Fill(127);
this.UvTop.AsSpan().Fill(127);
this.Nz.AsSpan().Fill(0);
this.Nz.AsSpan().Clear();
int predsW = (4 * this.mbw) + 1;
int predsH = (4 * this.mbh) + 1;
int predsSize = predsW * predsH;
this.Preds.AsSpan(predsSize + this.predsWidth, this.mbw).Fill(0);
this.Preds.AsSpan(predsSize + this.predsWidth, this.mbw).Clear();
this.TopDerr.AsSpan().Fill(0);
this.TopDerr.AsSpan().Clear();
}
private int Bit(uint nz, int n) => (nz & (1 << n)) != 0 ? 1 : 0;

2
src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs

@ -546,7 +546,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int predsW = (4 * this.Mbw) + 1;
int predsH = (4 * this.Mbh) + 1;
int predsSize = predsW * predsH;
this.Preds.AsSpan(predsSize + this.PredsWidth - 4, 4).Fill(0);
this.Preds.AsSpan(predsSize + this.PredsWidth - 4, 4).Clear();
this.Nz[0] = 0; // constant
}

323
src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs

@ -4,6 +4,11 @@
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
@ -60,6 +65,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static readonly int[] Vp8I4ModeOffsets = { I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 };
#if SUPPORTS_RUNTIME_INTRINSICS
public static readonly Vector128<short> K1 = Vector128.Create((short)20091).AsInt16();
public static readonly Vector128<short> K2 = Vector128.Create((short)-30068).AsInt16();
public static readonly Vector128<short> Four = Vector128.Create((short)4);
#endif
static Vp8Encoding()
{
for (int i = -255; i <= 255 + 255; i++)
@ -68,51 +81,299 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
public static void ITransform(Span<byte> reference, Span<short> input, Span<byte> dst, bool doTwo, Span<int> scratch)
// Transforms (Paragraph 14.4)
// Does two inverse transforms.
public static void ITransform(Span<byte> reference, Span<short> input, Span<byte> dst, Span<int> scratch)
{
ITransformOne(reference, input, dst, scratch);
if (doTwo)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
// This implementation makes use of 16-bit fixed point versions of two
// multiply constants:
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
// K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
//
// To be able to use signed 16-bit integers, we use the following trick to
// have constants within range:
// - Associated constants are obtained by subtracting the 16-bit fixed point
// version of one:
// k = K - (1 << 16) => K = k + (1 << 16)
// K1 = 85267 => k1 = 20091
// K2 = 35468 => k2 = -30068
// - The multiplication of a variable by a constant become the sum of the
// variable and the multiplication of that variable by the associated
// constant:
// (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
// Load and concatenate the transform coefficients (we'll do two inverse
// transforms in parallel). In the case of only one inverse transform, the
// second half of the vectors will just contain random value we'll never
// use nor store.
ref short inputRef = ref MemoryMarshal.GetReference(input);
var in0 = Vector128.Create(Unsafe.As<short, long>(ref inputRef), 0);
var in1 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 4)), 0);
var in2 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 8)), 0);
var in3 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 12)), 0);
// a00 a10 a20 a30 x x x x
// a01 a11 a21 a31 x x x x
// a02 a12 a22 a32 x x x x
// a03 a13 a23 a33 x x x x
var inb0 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 16)), 0);
var inb1 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 20)), 0);
var inb2 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 24)), 0);
var inb3 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 28)), 0);
in0 = Sse2.UnpackLow(in0, inb0);
in1 = Sse2.UnpackLow(in1, inb1);
in2 = Sse2.UnpackLow(in2, inb2);
in3 = Sse2.UnpackLow(in3, inb3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
// Vertical pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformVerticalPass(in0, in2, in1, in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128<long> t0, out Vector128<long> t1, out Vector128<long> t2, out Vector128<long> t3);
// Horizontal pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformHorizontalPass(t0, t2, t1, t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3);
// Add inverse transform to 'ref' and store.
// Load the reference(s).
Vector128<byte> ref0 = Vector128<byte>.Zero;
Vector128<byte> ref1 = Vector128<byte>.Zero;
Vector128<byte> ref2 = Vector128<byte>.Zero;
Vector128<byte> ref3 = Vector128<byte>.Zero;
ref byte referenceRef = ref MemoryMarshal.GetReference(reference);
// Load eight bytes/pixels per line.
ref0 = Vector128.Create(Unsafe.As<byte, long>(ref referenceRef), 0).AsByte();
ref1 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps)), 0).AsByte();
ref2 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2)), 0).AsByte();
ref3 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3)), 0).AsByte();
// Convert to 16b.
ref0 = Sse2.UnpackLow(ref0, Vector128<byte>.Zero);
ref1 = Sse2.UnpackLow(ref1, Vector128<byte>.Zero);
ref2 = Sse2.UnpackLow(ref2, Vector128<byte>.Zero);
ref3 = Sse2.UnpackLow(ref3, Vector128<byte>.Zero);
// Add the inverse transform(s).
Vector128<short> ref0InvAdded = Sse2.Add(ref0.AsInt16(), t0.AsInt16());
Vector128<short> ref1InvAdded = Sse2.Add(ref1.AsInt16(), t1.AsInt16());
Vector128<short> ref2InvAdded = Sse2.Add(ref2.AsInt16(), t2.AsInt16());
Vector128<short> ref3InvAdded = Sse2.Add(ref3.AsInt16(), t3.AsInt16());
// Unsigned saturate to 8b.
ref0 = Sse2.PackUnsignedSaturate(ref0InvAdded, ref0InvAdded);
ref1 = Sse2.PackUnsignedSaturate(ref1InvAdded, ref1InvAdded);
ref2 = Sse2.PackUnsignedSaturate(ref2InvAdded, ref2InvAdded);
ref3 = Sse2.PackUnsignedSaturate(ref3InvAdded, ref3InvAdded);
// Unsigned saturate to 8b.
ref byte outputRef = ref MemoryMarshal.GetReference(dst);
// Store eight bytes/pixels per line.
Unsafe.As<byte, Vector64<byte>>(ref outputRef) = ref0.GetLower();
Unsafe.As<byte, Vector64<byte>>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps)) = ref1.GetLower();
Unsafe.As<byte, Vector64<byte>>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 2)) = ref2.GetLower();
Unsafe.As<byte, Vector64<byte>>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 3)) = ref3.GetLower();
}
else
#endif
{
ITransformOne(reference, input, dst, scratch);
ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4), scratch);
}
}
public static void ITransformOne(Span<byte> reference, Span<short> input, Span<byte> dst, Span<int> scratch)
{
int i;
Span<int> tmp = scratch.Slice(0, 16);
for (i = 0; i < 4; i++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
// vertical pass.
int a = input[0] + input[8];
int b = input[0] - input[8];
int c = Mul(input[4], KC2) - Mul(input[12], KC1);
int d = Mul(input[4], KC1) + Mul(input[12], KC2);
tmp[0] = a + d;
tmp[1] = b + c;
tmp[2] = b - c;
tmp[3] = a - d;
tmp = tmp.Slice(4);
input = input.Slice(1);
// Load and concatenate the transform coefficients (we'll do two inverse
// transforms in parallel). In the case of only one inverse transform, the
// second half of the vectors will just contain random value we'll never
// use nor store.
ref short inputRef = ref MemoryMarshal.GetReference(input);
var in0 = Vector128.Create(Unsafe.As<short, long>(ref inputRef), 0);
var in1 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 4)), 0);
var in2 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 8)), 0);
var in3 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 12)), 0);
// a00 a10 a20 a30 x x x x
// a01 a11 a21 a31 x x x x
// a02 a12 a22 a32 x x x x
// a03 a13 a23 a33 x x x x
// Vertical pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformVerticalPass(in0, in2, in1, in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128<long> t0, out Vector128<long> t1, out Vector128<long> t2, out Vector128<long> t3);
// Horizontal pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformHorizontalPass(t0, t2, t1, t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3);
// Add inverse transform to 'ref' and store.
// Load the reference(s).
Vector128<byte> ref0 = Vector128<byte>.Zero;
Vector128<byte> ref1 = Vector128<byte>.Zero;
Vector128<byte> ref2 = Vector128<byte>.Zero;
Vector128<byte> ref3 = Vector128<byte>.Zero;
ref byte referenceRef = ref MemoryMarshal.GetReference(reference);
// Load four bytes/pixels per line.
ref0 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref referenceRef)).AsByte();
ref1 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps))).AsByte();
ref2 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2))).AsByte();
ref3 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3))).AsByte();
// Convert to 16b.
ref0 = Sse2.UnpackLow(ref0, Vector128<byte>.Zero);
ref1 = Sse2.UnpackLow(ref1, Vector128<byte>.Zero);
ref2 = Sse2.UnpackLow(ref2, Vector128<byte>.Zero);
ref3 = Sse2.UnpackLow(ref3, Vector128<byte>.Zero);
// Add the inverse transform(s).
Vector128<short> ref0InvAdded = Sse2.Add(ref0.AsInt16(), t0.AsInt16());
Vector128<short> ref1InvAdded = Sse2.Add(ref1.AsInt16(), t1.AsInt16());
Vector128<short> ref2InvAdded = Sse2.Add(ref2.AsInt16(), t2.AsInt16());
Vector128<short> ref3InvAdded = Sse2.Add(ref3.AsInt16(), t3.AsInt16());
// Unsigned saturate to 8b.
ref0 = Sse2.PackUnsignedSaturate(ref0InvAdded, ref0InvAdded);
ref1 = Sse2.PackUnsignedSaturate(ref1InvAdded, ref1InvAdded);
ref2 = Sse2.PackUnsignedSaturate(ref2InvAdded, ref2InvAdded);
ref3 = Sse2.PackUnsignedSaturate(ref3InvAdded, ref3InvAdded);
// Unsigned saturate to 8b.
ref byte outputRef = ref MemoryMarshal.GetReference(dst);
// Store four bytes/pixels per line.
int output0 = Sse2.ConvertToInt32(ref0.AsInt32());
int output1 = Sse2.ConvertToInt32(ref1.AsInt32());
int output2 = Sse2.ConvertToInt32(ref2.AsInt32());
int output3 = Sse2.ConvertToInt32(ref3.AsInt32());
Unsafe.As<byte, int>(ref outputRef) = output0;
Unsafe.As<byte, int>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps)) = output1;
Unsafe.As<byte, int>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 2)) = output2;
Unsafe.As<byte, int>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 3)) = output3;
}
tmp = scratch;
for (i = 0; i < 4; i++)
else
#endif
{
// horizontal pass.
int dc = tmp[0] + 4;
int a = dc + tmp[8];
int b = dc - tmp[8];
int c = Mul(tmp[4], KC2) - Mul(tmp[12], KC1);
int d = Mul(tmp[4], KC1) + Mul(tmp[12], KC2);
Store(dst, reference, 0, i, a + d);
Store(dst, reference, 1, i, b + c);
Store(dst, reference, 2, i, b - c);
Store(dst, reference, 3, i, a - d);
tmp = tmp.Slice(1);
int i;
Span<int> tmp = scratch.Slice(0, 16);
for (i = 0; i < 4; i++)
{
// vertical pass.
int a = input[0] + input[8];
int b = input[0] - input[8];
int c = Mul(input[4], KC2) - Mul(input[12], KC1);
int d = Mul(input[4], KC1) + Mul(input[12], KC2);
tmp[0] = a + d;
tmp[1] = b + c;
tmp[2] = b - c;
tmp[3] = a - d;
tmp = tmp.Slice(4);
input = input.Slice(1);
}
tmp = scratch;
for (i = 0; i < 4; i++)
{
// horizontal pass.
int dc = tmp[0] + 4;
int a = dc + tmp[8];
int b = dc - tmp[8];
int c = Mul(tmp[4], KC2) - Mul(tmp[12], KC1);
int d = Mul(tmp[4], KC1) + Mul(tmp[12], KC2);
Store(dst, reference, 0, i, a + d);
Store(dst, reference, 1, i, b + c);
Store(dst, reference, 2, i, b - c);
Store(dst, reference, 3, i, a - d);
tmp = tmp.Slice(1);
}
}
}
#if SUPPORTS_RUNTIME_INTRINSICS
private static void InverseTransformVerticalPass(Vector128<long> in0, Vector128<long> in2, Vector128<long> in1, Vector128<long> in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3)
{
Vector128<short> a = Sse2.Add(in0.AsInt16(), in2.AsInt16());
Vector128<short> b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16());
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2);
Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1);
Vector128<short> c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16());
Vector128<short> c4 = Sse2.Subtract(c1, c2);
Vector128<short> c = Sse2.Add(c3, c4);
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1);
Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2);
Vector128<short> d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16());
Vector128<short> d4 = Sse2.Add(d1, d2);
Vector128<short> d = Sse2.Add(d3, d4);
// Second pass.
tmp0 = Sse2.Add(a, d);
tmp1 = Sse2.Add(b, c);
tmp2 = Sse2.Subtract(b, c);
tmp3 = Sse2.Subtract(a, d);
}
private static void InverseTransformHorizontalPass(Vector128<long> t0, Vector128<long> t2, Vector128<long> t1, Vector128<long> t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3)
{
Vector128<short> dc = Sse2.Add(t0.AsInt16(), Four);
Vector128<short> a = Sse2.Add(dc, t2.AsInt16());
Vector128<short> b = Sse2.Subtract(dc, t2.AsInt16());
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
Vector128<short> c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2);
Vector128<short> c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1);
Vector128<short> c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16());
Vector128<short> c4 = Sse2.Subtract(c1, c2);
Vector128<short> c = Sse2.Add(c3, c4);
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
Vector128<short> d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1);
Vector128<short> d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2);
Vector128<short> d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16());
Vector128<short> d4 = Sse2.Add(d1, d2);
Vector128<short> d = Sse2.Add(d3, d4);
// Second pass.
Vector128<short> tmp0 = Sse2.Add(a, d);
Vector128<short> tmp1 = Sse2.Add(b, c);
Vector128<short> tmp2 = Sse2.Subtract(b, c);
Vector128<short> tmp3 = Sse2.Subtract(a, d);
shifted0 = Sse2.ShiftRightArithmetic(tmp0, 3);
shifted1 = Sse2.ShiftRightArithmetic(tmp1, 3);
shifted2 = Sse2.ShiftRightArithmetic(tmp2, 3);
shifted3 = Sse2.ShiftRightArithmetic(tmp3, 3);
}
#endif
public static void FTransform2(Span<byte> src, Span<byte> reference, Span<short> output, Span<short> output2, Span<int> scratch)
{
FTransform(src, reference, output, scratch);

3
src/ImageSharp/Formats/Webp/WebpLookupTables.cs

@ -239,7 +239,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
}
};
public static readonly byte[] Norm =
// This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> Norm => new byte[]
{
// renorm_sizes[i] = 8 - log2(i)
7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,

2
src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs

@ -16,7 +16,7 @@ namespace SixLabors.ImageSharp.Processing
/// <param name="source">The image this method extends.</param>
/// <returns>The <see cref="IImageProcessingContext"/> to allow chaining of operations.</returns>
public static IImageProcessingContext HistogramEqualization(this IImageProcessingContext source) =>
HistogramEqualization(source, HistogramEqualizationOptions.Default);
HistogramEqualization(source, new HistogramEqualizationOptions());
/// <summary>
/// Equalizes the histogram of an image to increases the contrast.

2
src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs

@ -396,6 +396,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
Numerics.Premultiply(sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length);
ref Vector4 targetStart = ref targetBase;

5
src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs

@ -8,11 +8,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
/// </summary>
public class HistogramEqualizationOptions
{
/// <summary>
/// Gets the default <see cref="HistogramEqualizationOptions"/> instance.
/// </summary>
public static HistogramEqualizationOptions Default { get; } = new HistogramEqualizationOptions();
/// <summary>
/// Gets or sets the histogram equalization method to use. Defaults to global histogram equalization.
/// </summary>

4
src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs

@ -22,7 +22,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization
where TPixel : unmanaged, IPixel<TPixel>
{
private Rgba32[] rgbaPalette;
private readonly ColorDistanceCache cache;
// Do not make this readonly! Struct value would be always copied on non-readonly method calls.
private ColorDistanceCache cache;
private readonly Configuration configuration;
/// <summary>

0
tests/ImageSharp.Benchmarks/Codecs/DecodeBmp.cs → tests/ImageSharp.Benchmarks/Codecs/Bmp/DecodeBmp.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeBmp.cs → tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmp.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeBmpMultiple.cs → tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmpMultiple.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeGif.cs → tests/ImageSharp.Benchmarks/Codecs/Gif/DecodeGif.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeGif.cs → tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGif.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeGifMultiple.cs → tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGifMultiple.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/CmykColorConversion.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/ColorConversionBenchmark.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/GrayscaleColorConversion.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/RgbColorConversion.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs

0
tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrForwardConverterBenchmark.cs

0
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs → tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YccKColorConverter.cs

82
tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg.cs

@ -0,0 +1,82 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.IO;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg;
using SixLabors.ImageSharp.Tests;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
public class DecodeJpeg
{
private JpegDecoder decoder;
private MemoryStream preloadedImageStream;
private void GenericSetup(string imageSubpath)
{
this.decoder = new JpegDecoder();
byte[] bytes = File.ReadAllBytes(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, imageSubpath));
this.preloadedImageStream = new MemoryStream(bytes);
}
private void GenericBechmark()
{
this.preloadedImageStream.Position = 0;
using Image img = this.decoder.Decode(Configuration.Default, this.preloadedImageStream);
}
[GlobalSetup(Target = nameof(JpegBaselineInterleaved444))]
public void SetupBaselineInterleaved444() =>
this.GenericSetup(TestImages.Jpeg.Baseline.Winter444_Interleaved);
[GlobalSetup(Target = nameof(JpegBaselineInterleaved420))]
public void SetupBaselineInterleaved420() =>
this.GenericSetup(TestImages.Jpeg.Baseline.Hiyamugi);
[GlobalSetup(Target = nameof(JpegBaseline400))]
public void SetupBaselineSingleComponent() =>
this.GenericSetup(TestImages.Jpeg.Baseline.Jpeg400);
[GlobalSetup(Target = nameof(JpegProgressiveNonInterleaved420))]
public void SetupProgressiveNoninterleaved420() =>
this.GenericSetup(TestImages.Jpeg.Progressive.Winter420_NonInterleaved);
[GlobalCleanup]
public void Cleanup()
{
this.preloadedImageStream.Dispose();
this.preloadedImageStream = null;
}
[Benchmark(Description = "Baseline 4:4:4 Interleaved")]
public void JpegBaselineInterleaved444() => this.GenericBechmark();
[Benchmark(Description = "Baseline 4:2:0 Interleaved")]
public void JpegBaselineInterleaved420() => this.GenericBechmark();
[Benchmark(Description = "Baseline 4:0:0 (grayscale)")]
public void JpegBaseline400() => this.GenericBechmark();
[Benchmark(Description = "Progressive 4:2:0 Non-Interleaved")]
public void JpegProgressiveNonInterleaved420() => this.GenericBechmark();
}
}
/*
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1348 (20H2/October2020Update)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
DefaultJob : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
| Method | Mean | Error | StdDev |
|------------------------------------ |----------:|----------:|----------:|
| 'Baseline 4:4:4 Interleaved' | 11.127 ms | 0.0659 ms | 0.0550 ms |
| 'Baseline 4:2:0 Interleaved' | 8.458 ms | 0.0289 ms | 0.0256 ms |
| 'Baseline 4:0:0 (grayscale)' | 1.550 ms | 0.0050 ms | 0.0044 ms |
| 'Progressive 4:2:0 Non-Interleaved' | 13.220 ms | 0.0449 ms | 0.0398 ms |
*/

0
tests/ImageSharp.Benchmarks/Codecs/DecodeFilteredPng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/DecodeFilteredPng.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodePng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/DecodePng.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodePng.cs → tests/ImageSharp.Benchmarks/Codecs/Png/EncodePng.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeTga.cs → tests/ImageSharp.Benchmarks/Codecs/Tga/DecodeTga.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeTga.cs → tests/ImageSharp.Benchmarks/Codecs/Tga/EncodeTga.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeTiff.cs → tests/ImageSharp.Benchmarks/Codecs/Tiff/DecodeTiff.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeTiff.cs → tests/ImageSharp.Benchmarks/Codecs/Tiff/EncodeTiff.cs

0
tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs → tests/ImageSharp.Benchmarks/Codecs/Webp/DecodeWebp.cs

0
tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs → tests/ImageSharp.Benchmarks/Codecs/Webp/EncodeWebp.cs

7
tests/ImageSharp.Benchmarks/benchmark.sh

@ -1,7 +0,0 @@
#!/bin/bash
# Build in release mode
dotnet build -c Release -f netcoreapp2.0
# Run benchmarks
dotnet bin/Release/netcoreapp2.0/ImageSharp.Benchmarks.dll

5
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -183,9 +183,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Assert.Equal(expected, actual);
}
// This method has only 2 implementations:
// 1. AVX
// 2. Scalar
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
HwIntrinsics.AllowAll | HwIntrinsics.DisableHWIntrinsic);
}
private static float[] Create8x8ColorCropTestData()

26
tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs

@ -276,5 +276,31 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Fact]
public void TransposeInplace()
{
static void RunTest()
{
short[] expected = Create8x8ShortData();
ReferenceImplementations.Transpose8x8(expected);
var block8x8 = default(Block8x8);
block8x8.LoadFrom(Create8x8ShortData());
block8x8.TransposeInplace();
short[] actual = new short[64];
block8x8.CopyTo(actual);
Assert.Equal(expected, actual);
}
// This method has only 1 implementation:
// 1. Scalar
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.DisableHWIntrinsic);
}
}
}

209
tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs

@ -2,9 +2,6 @@
// Licensed under the Apache License, Version 2.0.
using System;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
using SixLabors.ImageSharp.Tests.TestUtilities;
@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Trait("Format", "Jpg")]
public static class DCTTests
{
private const int MaxAllowedValue = short.MaxValue;
private const int MinAllowedValue = short.MinValue;
internal static Block8x8F CreateBlockFromScalar(float value)
{
Block8x8F result = default;
for (int i = 0; i < Block8x8F.Size; i++)
{
result[i] = value;
}
return result;
}
public class FastFloatingPoint : JpegFixture
{
public FastFloatingPoint(ITestOutputHelper output)
@ -24,130 +35,75 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
}
// Reference tests
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToNonOptimized(int seed)
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var srcBlock = Block8x8F.Load(sourceArray);
// reference
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock);
var temp = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
this.CompareBlocks(expected, srcBlock, 1f);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToAccurate(int seed)
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
// testee
// Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
// before applying IDCT
// Dequantization using unit matrix - no values are upscaled
Block8x8F dequantMatrix = CreateBlockFromScalar(1);
var srcBlock = Block8x8F.Load(sourceArray);
// This step is needed to apply adjusting multipliers to the input block
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
// IDCT implementation tranforms blocks after transposition
srcBlock.TransposeInplace();
srcBlock.MultiplyInPlace(ref dequantMatrix);
var temp = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
// IDCT calculation
FastFloatingPointDCT.TransformIDCT(ref srcBlock);
this.CompareBlocks(expected, srcBlock, 1f);
}
// Inverse transform
[Theory]
[InlineData(1)]
[InlineData(2)]
public void IDCT8x4_LeftPart(int seed)
{
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
// testee
FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void IDCT8x4_RightPart(int seed)
[InlineData(3)]
public void LLM_TransformIDCT_CompareToAccurate(int seed)
{
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
var srcBlock = Block8x8F.Load(sourceArray);
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
// testee
FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void IDCT8x8_Avx(int seed)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (!Avx.IsSupported)
{
this.Output.WriteLine("No AVX present, skipping test!");
return;
}
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
Block8x8F srcBlock = default;
srcBlock.LoadFrom(src);
// Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
// before applying IDCT
// Dequantization using unit matrix - no values are upscaled
Block8x8F dequantMatrix = CreateBlockFromScalar(1);
Block8x8F destBlock = default;
// This step is needed to apply adjusting multipliers to the input block
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
float[] expectedDest = new float[64];
// IDCT implementation tranforms blocks after transposition
srcBlock.TransposeInplace();
srcBlock.MultiplyInPlace(ref dequantMatrix);
// reference, left part
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
// IDCT calculation
FastFloatingPointDCT.TransformIDCT(ref srcBlock);
// reference, right part
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
// testee, whole 8x8
FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock);
float[] actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
#endif
this.CompareBlocks(expected, srcBlock, 1f);
}
// Inverse transform
// This test covers entire IDCT conversion chain
// This test checks all hardware implementations
[Theory]
[InlineData(1)]
[InlineData(2)]
@ -157,41 +113,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
int seed = FeatureTestRunner.Deserialize<int>(serialized);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var expectedDest = new float[64];
var temp1 = new float[64];
var temp2 = default(Block8x8F);
float[] expectedDest = new float[64];
float[] temp = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1);
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp);
// testee
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2);
// Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
// before applying IDCT
Block8x8F dequantMatrix = CreateBlockFromScalar(1);
// Dequantization using unit matrix - no values are upscaled
// as quant matrix is all 1's
// This step is needed to apply adjusting multipliers to the input block
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
srcBlock.MultiplyInPlace(ref dequantMatrix);
// IDCT implementation tranforms blocks after transposition
srcBlock.TransposeInplace();
var actualDest = new float[64];
srcBlock.ScaledCopyTo(actualDest);
// IDCT calculation
FastFloatingPointDCT.TransformIDCT(ref srcBlock);
float[] actualDest = srcBlock.ToArray();
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
// 3 paths:
// 4 paths:
// 1. AllowAll - call avx/fma implementation
// 2. DisableFMA - call avx implementation without fma acceleration
// 3. DisableAvx - call fallback code of Vector4 implementation
//
// DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
// 2. DisableFMA - call avx without fma implementation
// 3. DisableAvx - call sse Vector4 implementation
// 4. DisableHWIntrinsic - call scalar fallback implementation
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX);
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
}
// Forward transform
// This test covers entire FDCT conversions chain
// This test checks all implementations: intrinsic and scalar fallback
// This test covers entire FDCT conversion chain
// This test checks all hardware implementations
[Theory]
[InlineData(1)]
[InlineData(2)]
@ -201,7 +169,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
int seed = FeatureTestRunner.Deserialize<int>(serialized);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var block = default(Block8x8F);
block.LoadFrom(src);
@ -212,23 +180,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
// testee
// Part of the FDCT calculations is fused into the quantization step
// We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen
FastFloatingPointDCT.TransformFDCT(ref block);
for (int i = 0; i < 64; i++)
{
block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i];
}
// Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
// after applying FDCT
Block8x8F quantMatrix = CreateBlockFromScalar(1);
FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix);
block.MultiplyInPlace(ref quantMatrix);
float[] actualDest = block.ToArray();
Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f));
}
// 3 paths:
// 4 paths:
// 1. AllowAll - call avx/fma implementation
// 2. DisableFMA - call avx implementation without fma acceleration
// 3. DisableAvx - call sse implementation
// 2. DisableFMA - call avx without fma implementation
// 3. DisableAvx - call sse Vector4 implementation
// 4. DisableHWIntrinsic - call scalar fallback implementation
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,

2
tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs

@ -20,6 +20,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
TestImages.Jpeg.Baseline.Jpeg420Small,
TestImages.Jpeg.Issues.Fuzz.AccessViolationException922,
TestImages.Jpeg.Baseline.Jpeg444,
TestImages.Jpeg.Baseline.Jpeg422,
TestImages.Jpeg.Baseline.Bad.BadEOF,
TestImages.Jpeg.Baseline.MultiScanBaselineCMYK,
TestImages.Jpeg.Baseline.YcckSubsample1222,
@ -100,6 +101,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[TestImages.Jpeg.Baseline.Bad.BadEOF] = 0.38f / 100,
[TestImages.Jpeg.Baseline.Bad.BadRST] = 0.0589f / 100,
[TestImages.Jpeg.Baseline.Jpeg422] = 0.0013f / 100,
[TestImages.Jpeg.Baseline.Testorig420] = 0.38f / 100,
[TestImages.Jpeg.Baseline.Jpeg420Small] = 0.287f / 100,
[TestImages.Jpeg.Baseline.Turtle420] = 1.0f / 100,

2
tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs

@ -56,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ TestImages.Jpeg.Progressive.Fb, 75 },
{ TestImages.Jpeg.Issues.IncorrectQuality845, 98 },
{ TestImages.Jpeg.Baseline.ForestBridgeDifferentComponentsQuality, 89 },
{ TestImages.Jpeg.Progressive.Winter, 80 }
{ TestImages.Jpeg.Progressive.Winter420_NonInterleaved, 80 }
};
[Theory]

2
tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs

@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
bool failed = false;
for (int i = 0; i < 64; i++)
for (int i = 0; i < Block8x8F.Size; i++)
{
float expected = a[i];
float actual = b[i];

15
tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs

@ -48,6 +48,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
public short MaxVal { get; private set; } = short.MinValue;
internal void MakeBlock(Block8x8 block, int y, int x)
{
block.TransposeInplace();
this.MakeBlock(block.ToArray(), y, x);
}
internal void MakeBlock(short[] data, int y, int x)
{
this.MinVal = Math.Min(this.MinVal, data.Min());
@ -66,11 +72,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
Span<Block8x8> blockRow = data.DangerousGetRowSpan(y - startIndex);
for (int x = 0; x < this.WidthInBlocks; x++)
{
short[] block = blockRow[x].ToArray();
// x coordinate stays the same - we load entire stride
// y coordinate is tricky as we load single stride to full buffer - offset is needed
this.MakeBlock(block, y, x);
this.MakeBlock(blockRow[x], y, x);
}
}
}
@ -83,8 +85,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
Span<Block8x8> blockRow = data.DangerousGetRowSpan(y);
for (int x = 0; x < this.WidthInBlocks; x++)
{
short[] block = blockRow[x].ToArray();
this.MakeBlock(block, y, x);
this.MakeBlock(blockRow[x], y, x);
}
}
}

17
tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs

@ -40,6 +40,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
}
}
/// <summary>
/// Transpose 8x8 block stored linearly in a <see cref="Span{T}"/> (inplace)
/// </summary>
internal static void Transpose8x8(Span<short> data)
{
for (int i = 1; i < 8; i++)
{
int i8 = i * 8;
for (int j = 0; j < i; j++)
{
short tmp = data[i8 + j];
data[i8 + j] = data[(j * 8) + i];
data[(j * 8) + i] = tmp;
}
}
}
/// <summary>
/// Transpose 8x8 block stored linearly in a <see cref="Span{T}"/>
/// </summary>

14
tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using Xunit;
@ -9,8 +10,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Trait("Format", "Jpg")]
public class ZigZagTests
{
[Fact]
public void ZigZagCanHandleAllPossibleCoefficients()
private static void CanHandleAllPossibleCoefficients(ReadOnlySpan<byte> order)
{
// Mimic the behaviour of the huffman scan decoder using all possible byte values
short[] block = new short[64];
@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
if (s != 0)
{
i += r;
block[ZigZag.ZigZagOrder[i++]] = (short)s;
block[order[i++]] = (short)s;
}
else
{
@ -40,5 +40,13 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
}
}
}
[Fact]
public static void ZigZagCanHandleAllPossibleCoefficients() =>
CanHandleAllPossibleCoefficients(ZigZag.ZigZagOrder);
[Fact]
public static void TrasposingZigZagCanHandleAllPossibleCoefficients() =>
CanHandleAllPossibleCoefficients(ZigZag.TransposingOrder);
}
}

92
tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs

@ -0,0 +1,92 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.Webp
{
[Trait("Format", "Webp")]
public class ColorSpaceTransformUtilsTests
{
private static void RunCollectColorBlueTransformsTest()
{
uint[] pixelData =
{
3074, 256, 256, 256, 0, 65280, 65280, 65280, 256, 256, 0, 256, 0, 65280, 0, 65280, 16711680, 256,
256, 0, 65024, 0, 256, 256, 0, 65280, 0, 65280, 0, 256, 0, 256
};
int[] expectedOutput =
{
31, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int[] histo = new int[256];
ColorSpaceTransformUtils.CollectColorBlueTransforms(pixelData, 0, 32, 1, 0, 0, histo);
Assert.Equal(expectedOutput, histo);
}
private static void RunCollectColorRedTransformsTest()
{
uint[] pixelData =
{
3074, 256, 256, 256, 0, 65280, 65280, 65280, 256, 256, 0, 256, 0, 65280, 0, 65280, 16711680, 256,
256, 0, 65024, 0, 256, 256, 0, 65280, 0, 65280, 0, 256, 0, 256
};
int[] expectedOutput =
{
31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
};
int[] histo = new int[256];
ColorSpaceTransformUtils.CollectColorRedTransforms(pixelData, 0, 32, 1, 0, histo);
Assert.Equal(expectedOutput, histo);
}
[Fact]
public void CollectColorBlueTransforms_Works() => RunCollectColorBlueTransformsTest();
[Fact]
public void CollectColorRedTransforms_Works() => RunCollectColorRedTransformsTest();
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void CollectColorBlueTransforms_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.AllowAll);
[Fact]
public void CollectColorBlueTransforms_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.DisableSSE41);
[Fact]
public void CollectColorBlueTransforms_WithoutAvx2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.DisableAVX2);
[Fact]
public void CollectColorRedTransforms_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.AllowAll);
[Fact]
public void CollectColorRedTransforms_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.DisableSSE41);
[Fact]
public void CollectColorRedTransforms_WithoutAvx2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.DisableAVX2);
#endif
}
}

6
tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs

@ -257,11 +257,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Fact]
public void TransformColor_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableSSE2);
[Fact]
public void TransformColor_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableAVX2);
[Fact]
public void TransformColorInverse_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.AllowAll);
[Fact]
public void TransformColorInverse_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.DisableSSE2);
[Fact]
public void TransformColorInverse_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.DisableAVX2);
#endif
}
}

4
tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs

@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
namespace SixLabors.ImageSharp.Tests.Formats.Webp
{
[Trait("Format", "Webp")]
public class LossyUtilsTests
@ -38,7 +38,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
int actual = LossyUtils.Vp8_Sse4X4(a, b);
Assert.Equal(expected, actual);
}
}
private static void RunMean16x4Test()
{

6
tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

@ -40,8 +40,13 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Fact]
public void ColorSpaceTransform_WithBikeImage_WithoutSSE41_Works()
=> FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_WithBikeImage_ProducesExpectedData, HwIntrinsics.DisableSSE41);
[Fact]
public void ColorSpaceTransform_WithBikeImage_WithoutAvx2_Works()
=> FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_WithBikeImage_ProducesExpectedData, HwIntrinsics.DisableAVX2);
#endif
// Test image: Input\Webp\peak.png
private static void RunColorSpaceTransformTestWithPeakImage()
{
// arrange
@ -99,6 +104,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
Assert.Equal(expectedData, transformData);
}
// Test image: Input\Png\Bike.png
private static void RunColorSpaceTransformTestWithBikeImage()
{
// arrange

2
tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs

@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
namespace SixLabors.ImageSharp.Tests.Formats.Webp
{
[Trait("Format", "Webp")]
public class QuantEncTests

98
tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs

@ -0,0 +1,98 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Linq;
using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.Webp
{
[Trait("Format", "Webp")]
public class Vp8EncodingTests
{
private static void RunOneInverseTransformTest()
{
// arrange
byte[] reference =
{
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129
};
short[] input = { 1, 216, -48, 0, 96, -24, -48, 24, 0, -24, 24, 0, 0, 0, 0, 0, 38, -240, -72, -24, 0, -24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
byte[] dst = new byte[128];
byte[] expected =
{
161, 160, 149, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 160, 160, 133, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 156, 147, 109, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 152, 128, 87, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
};
int[] scratch = new int[16];
// act
Vp8Encoding.ITransformOne(reference, input, dst, scratch);
// assert
Assert.True(dst.SequenceEqual(expected));
}
private static void RunTwoInverseTransformTest()
{
// arrange
byte[] reference =
{
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129
};
short[] input = { 1, 216, -48, 0, 96, -24, -48, 24, 0, -24, 24, 0, 0, 0, 0, 0, 38, -240, -72, -24, 0, -24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
byte[] dst = new byte[128];
byte[] expected =
{
161, 160, 149, 105, 78, 127, 156, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 160, 160, 133, 85, 81, 129, 155, 167, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 156, 147, 109, 76, 85, 130, 153, 163, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 128, 87, 83, 88, 132, 152, 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int[] scratch = new int[16];
// act
Vp8Encoding.ITransform(reference, input, dst, scratch);
// assert
Assert.True(dst.SequenceEqual(expected));
}
[Fact]
public void OneInverseTransform_Works() => RunOneInverseTransformTest();
[Fact]
public void TwoInverseTransform_Works() => RunTwoInverseTransformTest();
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void OneInverseTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunOneInverseTransformTest, HwIntrinsics.AllowAll);
[Fact]
public void OneInverseTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunOneInverseTransformTest, HwIntrinsics.DisableHWIntrinsic);
[Fact]
public void TwoInverseTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTwoInverseTransformTest, HwIntrinsics.AllowAll);
[Fact]
public void TwoInverseTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTwoInverseTransformTest, HwIntrinsics.DisableHWIntrinsic);
#endif
}
}

9
tests/ImageSharp.Tests/Processing/Processors/Convolution/Basic1ParameterConvolutionTests.cs

@ -17,10 +17,11 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution
public static readonly TheoryData<int> Values = new TheoryData<int> { 3, 5 };
public static readonly string[] InputImages =
{
TestImages.Bmp.Car,
TestImages.Png.CalliphoraPartial
};
{
TestImages.Bmp.Car,
TestImages.Png.CalliphoraPartial,
TestImages.Png.Blur
};
[Theory]
[WithFileCollection(nameof(InputImages), nameof(Values), PixelTypes.Rgba32)]

3
tests/ImageSharp.Tests/TestImages.cs

@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests
public const string Fb = "Jpg/progressive/fb.jpg";
public const string Progress = "Jpg/progressive/progress.jpg";
public const string Festzug = "Jpg/progressive/Festzug.jpg";
public const string Winter = "Jpg/progressive/winter.jpg";
public const string Winter420_NonInterleaved = "Jpg/progressive/winter420_noninterleaved.jpg";
public static class Bad
{
@ -213,6 +213,7 @@ namespace SixLabors.ImageSharp.Tests
public const string ArithmeticCoding = "Jpg/baseline/arithmetic_coding.jpg";
public const string ArithmeticCodingProgressive = "Jpg/progressive/arithmetic_progressive.jpg";
public const string Lossless = "Jpg/baseline/lossless.jpg";
public const string Winter444_Interleaved = "Jpg/baseline/winter444_interleaved.jpg";
public static readonly string[] All =
{

3
tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/InBox_Rgba32_blur_3.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c6dd45683953e7cecbbaaa339b78db1303f9583b8d0988fe1948c6b1b4ba297a
size 121550

3
tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/InBox_Rgba32_blur_5.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a3867cbbc1d425ceba20dd392de0728ce4de652860491e87434cd33675f56d8e
size 117863

3
tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/OnFullImage_Rgba32_blur_3.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:186c35bc159c7125f59b47866021051ff74368b9021dd09ad3c6386b39be3546
size 80992

3
tests/Images/External/ReferenceOutput/Convolution/BoxBlurTest/OnFullImage_Rgba32_blur_5.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9d7413d1d7ac69feb1d1f0a61d0d4a8228d3276337446d2c761ce58b0813cf66
size 67243

3
tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/InBox_Rgba32_blur_3.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ace61fd7330b5e52b7aa09af937259d200b71fa152bf1ffdc6b891e5b61abfd5
size 117133

3
tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/InBox_Rgba32_blur_5.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bc2f26bda2dec8354d8b77887806012f28f54b8a8f7e39e7e4bcb4d872d29042
size 114247

3
tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/OnFullImage_Rgba32_blur_3.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3aac58316fa795c2683f7cfac34f69ba71501abd78e0d72076cc36c439a8fa7a
size 63680

3
tests/Images/External/ReferenceOutput/Convolution/GaussianBlurTest/OnFullImage_Rgba32_blur_5.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7bf28351fa51e0e9b0c2fd4b3fc7a30b0b3a8c1ca2dc9dd62ec5fab56e22c10
size 50451

3
tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/InBox_Rgba32_blur_3.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b6eecdf3bf90a2dd9430ce8501ab98f7a25f4f06674673fd6b9ca6a44435d303
size 239962

3
tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/InBox_Rgba32_blur_5.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3cc3a46595d648a4551f499e1246ccdb63a80f424487fb7306fd3cfd772f5f1e
size 238816

3
tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/OnFullImage_Rgba32_blur_3.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:59ca62ae017d8f5a19dbd0f61ded29d936c325553eb3e08fe39f2440d4c941eb
size 356290

3
tests/Images/External/ReferenceOutput/Convolution/GaussianSharpenTest/OnFullImage_Rgba32_blur_5.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:427d325ace605fe9a22702dcd8bff20dff888293def6569c4dc635b56c732565
size 351992

3
tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:733cc46271c4402974db2536a55e6ecae3110856df73031ca48dad03745d852d
size 35375

3
tests/Images/Input/Jpg/baseline/winter444_interleaved.jpg

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:73b1deb4e2fb8027f6bb4fb293e5b2615c80b3ac0a7f99fd90118fd340a9fd12
size 283330

0
tests/Images/Input/Jpg/progressive/winter.jpg → tests/Images/Input/Jpg/progressive/winter420_noninterleaved.jpg

Loading…
Cancel
Save