Browse Source

New zig-zag implementation

pull/1761/head
Dmitry Pentin 5 years ago
parent
commit
6c5cf28ecd
  1. 2
      src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
  2. 87
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
  3. 2
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs
  4. 138
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  5. 17
      src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
  6. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs
  7. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
  8. 43
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
  9. 67
      src/ImageSharp/Formats/Jpeg/Components/Quantization.cs
  10. 404
      src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs
  11. 79
      src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs
  12. 6
      src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
  13. 12
      src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
  14. 74
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  15. 8
      tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs
  16. 54
      tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs
  17. 5
      tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs

2
src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs

@ -12,7 +12,7 @@ using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// 8x8 coefficients matrix of <see cref="short"/> type.
/// 8x8 matrix of <see cref="short"/> coefficients.
/// </summary>
// ReSharper disable once InconsistentNaming
[StructLayout(LayoutKind.Explicit)]

87
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs

@ -0,0 +1,87 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
/// </summary>
public const int RowCount = 8;
[FieldOffset(0)]
public Vector256<float> V0;
[FieldOffset(32)]
public Vector256<float> V1;
[FieldOffset(64)]
public Vector256<float> V2;
[FieldOffset(96)]
public Vector256<float> V3;
[FieldOffset(128)]
public Vector256<float> V4;
[FieldOffset(160)]
public Vector256<float> V5;
[FieldOffset(192)]
public Vector256<float> V6;
[FieldOffset(224)]
public Vector256<float> V7;
private static ReadOnlySpan<int> DivideIntoInt16_Avx2_ShuffleMask => new int[] {
0, 1, 4, 5, 2, 3, 6, 7
};
private static unsafe void DivideIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
fixed (int* maskPtr = DivideIntoInt16_Avx2_ShuffleMask)
{
Vector256<int> crossLaneShuffleMask = Avx.LoadVector256(maskPtr).AsInt32();
ref Vector256<float> aBase = ref Unsafe.As<Block8x8F, Vector256<float>>(ref a);
ref Vector256<float> bBase = ref Unsafe.As<Block8x8F, Vector256<float>>(ref b);
ref Vector256<short> destBase = ref Unsafe.As<Block8x8, Vector256<short>>(ref dest);
for (int i = 0; i < 8; i += 2)
{
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Divide(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Divide(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
row = Avx2.PermuteVar8x32(row.AsInt32(), crossLaneShuffleMask).AsInt16();
Unsafe.Add(ref destBase, i / 2) = row;
}
}
}
private static void DivideIntoInt16_Sse2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Sse2.IsSupported, "Sse2 support is required to run this operation!");
ref Vector128<float> aBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref a);
ref Vector128<float> bBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref b);
ref Vector128<short> destBase = ref Unsafe.As<Block8x8, Vector128<short>>(ref dest);
for (int i = 0; i < 16; i += 2)
{
Vector128<int> left = Sse2.ConvertToVector128Int32(Sse.Divide(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector128<int> right = Sse2.ConvertToVector128Int32(Sse.Divide(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
Vector128<short> row = Sse2.PackSignedSaturate(left, right);
Unsafe.Add(ref destBase, i / 2) = row;
}
}
}
}
#endif

2
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;

138
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -16,7 +16,7 @@ using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// 8x8 coefficients matrix of <see cref="float"/> type.
/// 8x8 matrix of <see cref="float"/> coefficients.
/// </summary>
[StructLayout(LayoutKind.Explicit)]
internal partial struct Block8x8F : IEquatable<Block8x8F>
@ -66,30 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public Vector4 V7L;
[FieldOffset(240)]
public Vector4 V7R;
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
/// </summary>
public const int RowCount = 8;
[FieldOffset(0)]
public Vector256<float> V0;
[FieldOffset(32)]
public Vector256<float> V1;
[FieldOffset(64)]
public Vector256<float> V2;
[FieldOffset(96)]
public Vector256<float> V3;
[FieldOffset(128)]
public Vector256<float> V4;
[FieldOffset(160)]
public Vector256<float> V5;
[FieldOffset(192)]
public Vector256<float> V6;
[FieldOffset(224)]
public Vector256<float> V7;
#endif
#pragma warning restore SA1600 // ElementsMustBeDocumented
/// <summary>
@ -188,13 +164,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return result;
}
/// <summary>
/// Fill the block with defaults (zeroes).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void Clear()
=> this = default; // The cheapest way to do this in C#:
/// <summary>
/// Load raw 32bit floating point data from source.
/// </summary>
@ -302,7 +271,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public float[] ToArray()
{
var result = new float[Size];
float[] result = new float[Size];
this.ScaledCopyTo(result);
return result;
}
@ -434,102 +403,37 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <summary>
/// Quantize the block.
/// </summary>
/// <param name="blockPtr">The block pointer.</param>
/// <param name="qtPtr">The qt pointer.</param>
/// <param name="unzigPtr">Unzig pointer</param>
public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
{
float* b = (float*)blockPtr;
float* qtp = (float*)qtPtr;
for (int qtIndex = 0; qtIndex < Size; qtIndex++)
{
byte blockIndex = unzigPtr[qtIndex];
float* unzigPos = b + blockIndex;
float val = *unzigPos;
val *= qtp[qtIndex];
*unzigPos = val;
}
}
/// <summary>
/// Quantize 'block' into 'dest' using the 'qt' quantization table:
/// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values.
/// To finish the rounding it's enough to (int)-cast these values.
/// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
/// </summary>
/// <param name="block">Source block</param>
/// <param name="dest">Destination block</param>
/// <param name="qt">The quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
public static unsafe void Quantize(
ref Block8x8F block,
ref Block8x8F dest,
ref Block8x8F qt,
ref ZigZag unZig)
/// <param name="block">Source block.</param>
/// <param name="dest">Destination block.</param>
/// <param name="qt">The quantization table.</param>
public static void Quantize(ref Block8x8F block, ref Block8x8 dest, ref Block8x8F qt)
{
for (int zig = 0; zig < Size; zig++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
dest[zig] = block[unZig[zig]];
DivideIntoInt16_Avx2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingAvx(ref dest, ref dest);
}
DivideRoundAll(ref dest, ref qt);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
else if (Ssse3.IsSupported)
{
var vnegOne = Vector256.Create(-1f);
var vadd = Vector256.Create(.5F);
var vone = Vector256.Create(1f);
for (int i = 0; i < RowCount; i++)
{
ref Vector256<float> aRow = ref Unsafe.Add(ref a.V0, i);
ref Vector256<float> bRow = ref Unsafe.Add(ref b.V0, i);
Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aRow), vone), vadd);
aRow = Avx.Add(Avx.Divide(aRow, bRow), voff);
}
DivideIntoInt16_Sse2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingSse(ref dest, ref dest);
}
else
#endif
{
a.V0L = DivideRound(a.V0L, b.V0L);
a.V0R = DivideRound(a.V0R, b.V0R);
a.V1L = DivideRound(a.V1L, b.V1L);
a.V1R = DivideRound(a.V1R, b.V1R);
a.V2L = DivideRound(a.V2L, b.V2L);
a.V2R = DivideRound(a.V2R, b.V2R);
a.V3L = DivideRound(a.V3L, b.V3L);
a.V3R = DivideRound(a.V3R, b.V3R);
a.V4L = DivideRound(a.V4L, b.V4L);
a.V4R = DivideRound(a.V4R, b.V4R);
a.V5L = DivideRound(a.V5L, b.V5L);
a.V5R = DivideRound(a.V5R, b.V5R);
a.V6L = DivideRound(a.V6L, b.V6L);
a.V6R = DivideRound(a.V6R, b.V6R);
a.V7L = DivideRound(a.V7L, b.V7L);
a.V7R = DivideRound(a.V7R, b.V7R);
for (int i = 0; i < Size; i++)
{
// TODO: find a way to index block & qt matrices with natural order indices for performance?
int zig = ZigZag.ZigZagOrder[i];
float divRes = block[zig] / qt[zig];
dest[i] = (short)(divRes + (divRes > 0 ? 0.5f : -0.5f));
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
{
var neg = new Vector4(-1);
var add = new Vector4(.5F);
// sign(dividend) = max(min(dividend, 1), -1)
Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One);
// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
return (dividend / divisor) + (sign * add);
}
public void RoundInto(ref Block8x8 dest)
{
for (int i = 0; i < Size; i++)

17
src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs

@ -54,9 +54,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </summary>
private readonly HuffmanTable[] acHuffmanTables;
// The unzig data.
private ZigZag dctZigZag;
private HuffmanScanBuffer scanBuffer;
private readonly SpectralConverter spectralConverter;
@ -74,7 +71,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
SpectralConverter converter,
CancellationToken cancellationToken)
{
this.dctZigZag = ZigZag.CreateUnzigTable();
this.stream = stream;
this.spectralConverter = converter;
this.cancellationToken = cancellationToken;
@ -477,7 +473,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
ref short blockDataRef = ref Unsafe.As<Block8x8, short>(ref block);
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
// DC
int t = buffer.DecodeHuffman(ref dcTable);
@ -502,7 +497,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
i += r;
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, zigzag[i++]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i++]) = (short)s;
}
else
{
@ -556,7 +551,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
int low = this.SuccessiveLow;
@ -572,7 +566,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (s != 0)
{
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, zigzag[i]) = (short)(s << low);
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i]) = (short)(s << low);
}
else
{
@ -602,7 +596,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
// Refinement scan for these AC coefficients
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
@ -649,7 +642,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
do
{
ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{
buffer.CheckBits();
@ -675,7 +668,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if ((s != 0) && (k < 64))
{
Unsafe.Add(ref blockDataRef, zigzag[k]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]) = (short)s;
}
}
}
@ -684,7 +677,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
for (; k <= end; k++)
{
ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs

@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
IJpegComponent[] Components { get; }
/// <summary>
/// Gets the quantization tables, in zigzag order.
/// Gets the quantization tables, in natural order.
/// </summary>
Block8x8F[] QuantizationTables { get; }
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public JpegBlockPostProcessor(IRawJpegData decoder, IJpegComponent component)
{
int qtIndex = component.QuantizationTableIndex;
this.DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]);
this.DequantiazationTable = decoder.QuantizationTables[qtIndex];
this.subSamplingDivisors = component.SubSamplingDivisors;
this.SourceBlock = default;

43
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs

@ -96,6 +96,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
private Block8x8F temporalBlock1;
private Block8x8F temporalBlock2;
private Block8x8 temporalShortBlock;
/// <summary>
/// The output stream. All attempted writes after the first error become no-ops.
@ -132,8 +133,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@ -156,22 +155,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
if (this.IsFlushNeeded)
{
@ -197,8 +193,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
@ -222,30 +216,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YLeft,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YRight,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
}
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
if (this.IsFlushNeeded)
{
@ -269,8 +259,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0;
@ -292,8 +280,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
if (this.IsFlushNeeded)
{
@ -320,28 +307,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex index,
int prevDC,
ref Block8x8F src,
ref Block8x8F quant,
ref ZigZag unZig)
ref Block8x8F quant)
{
ref Block8x8F refTemp1 = ref this.temporalBlock1;
ref Block8x8F refTemp2 = ref this.temporalBlock2;
ref Block8x8 spectralBlock = ref this.temporalShortBlock;
FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2);
Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig);
Block8x8F.Quantize(ref refTemp1, ref spectralBlock, ref quant);
// Emit the DC delta.
int dc = (int)refTemp2[0];
int dc = spectralBlock[0];
this.EmitHuffRLE(this.huffmanTables[2 * (int)index].Values, 0, dc - prevDC);
// Emit the AC components.
int[] acHuffTable = this.huffmanTables[(2 * (int)index) + 1].Values;
int runLength = 0;
int lastValuableIndex = refTemp2.GetLastNonZeroIndex();
int lastValuableIndex = spectralBlock.GetLastNonZeroIndex();
for (int zig = 1; zig <= lastValuableIndex; zig++)
{
int ac = (int)refTemp2[zig];
int ac = spectralBlock[zig];
if (ac == 0)
{

67
src/ImageSharp/Formats/Jpeg/Components/Quantization.cs

@ -39,53 +39,59 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public const int QualityEstimationConfidenceUpperThreshold = 98;
/// <summary>
/// Gets the unscaled luminance quantization table in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from ITU section K.1 after converting from natural to
/// zig-zag order.
/// Gets unscaled luminance quantization table.
/// </summary>
/// <remarks>
/// The values are derived from ITU section K.1.
/// </remarks>
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
public static ReadOnlySpan<byte> UnscaledQuant_Luminance => new byte[]
public static ReadOnlySpan<byte> LuminanceTable => new byte[]
{
16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24,
40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60,
57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80,
109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112,
100, 120, 92, 101, 103, 99,
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99,
};
/// <summary>
/// Gets the unscaled chrominance quantization table in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from ITU section K.1 after converting from natural to
/// zig-zag order.
/// Gets unscaled chrominance quantization table.
/// </summary>
/// <remarks>
/// The values are derived from ITU section K.1.
/// </remarks>
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
public static ReadOnlySpan<byte> UnscaledQuant_Chrominance => new byte[]
public static ReadOnlySpan<byte> ChrominanceTable => new byte[]
{
17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
};
/// Ported from JPEGsnoop:
/// https://github.com/ImpulseAdventure/JPEGsnoop/blob/9732ee0961f100eb69bbff4a0c47438d5997abee/source/JfifDecode.cpp#L4570-L4694
/// <summary>
/// Estimates jpeg quality based on quantization table in zig-zag order.
/// Estimates jpeg quality based on standard quantization table.
/// </summary>
/// <remarks>
/// This technically can be used with any given table but internal decoder code uses ITU spec tables:
/// <see cref="UnscaledQuant_Luminance"/> and <see cref="UnscaledQuant_Chrominance"/>.
/// Technically, this can be used with any given table but internal decoder code uses ITU spec tables:
/// <see cref="LuminanceTable"/> and <see cref="ChrominanceTable"/>.
/// </remarks>
/// <param name="table">Input quantization table.</param>
/// <param name="target">Quantization to estimate against.</param>
/// <returns>Estimated quality</returns>
/// <param name="target">Natural order quantization table to estimate against.</param>
/// <returns>Estimated quality.</returns>
public static int EstimateQuality(ref Block8x8F table, ReadOnlySpan<byte> target)
{
// This method can be SIMD'ified if standard table is injected as Block8x8F.
@ -106,11 +112,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
int quality;
for (int i = 0; i < Block8x8F.Size; i++)
{
float coeff = table[i];
int coeffInteger = (int)coeff;
int coeff = (int)table[i];
// Coefficients are actually int16 casted to float numbers so there's no truncating error.
if (coeffInteger != 0)
if (coeff != 0)
{
comparePercent = 100.0 * (table[i] / target[i]);
}
@ -152,7 +157,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <returns>Estimated quality</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EstimateLuminanceQuality(ref Block8x8F luminanceTable)
=> EstimateQuality(ref luminanceTable, UnscaledQuant_Luminance);
=> EstimateQuality(ref luminanceTable, LuminanceTable);
/// <summary>
/// Estimates jpeg quality based on quantization table in zig-zag order.
@ -161,7 +166,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <returns>Estimated quality</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EstimateChrominanceQuality(ref Block8x8F chrominanceTable)
=> EstimateQuality(ref chrominanceTable, UnscaledQuant_Chrominance);
=> EstimateQuality(ref chrominanceTable, ChrominanceTable);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int QualityToScale(int quality)
@ -185,10 +190,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Block8x8F ScaleLuminanceTable(int quality)
=> ScaleQuantizationTable(scale: QualityToScale(quality), UnscaledQuant_Luminance);
=> ScaleQuantizationTable(scale: QualityToScale(quality), LuminanceTable);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Block8x8F ScaleChrominanceTable(int quality)
=> ScaleQuantizationTable(scale: QualityToScale(quality), UnscaledQuant_Chrominance);
=> ScaleQuantizationTable(scale: QualityToScale(quality), ChrominanceTable);
}
}

404
src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs

@ -0,0 +1,404 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal static partial class ZigZag
{
/// <summary>
/// Special byte value to zero out elements during Sse/Avx shuffle intrinsics.
/// </summary>
private const byte Z = 0xff;
/// <summary>
/// Gets shuffle vectors for <see cref="ApplyZigZagOrderingSse"/>
/// zig zag implementation.
/// </summary>
private static ReadOnlySpan<byte> SseShuffleMasks => new byte[]
{
// 0_A
0, 1, 2, 3, Z, Z, Z, Z, Z, Z, 4, 5, 6, 7, Z, Z,
// 0_B
Z, Z, Z, Z, 0, 1, Z, Z, 2, 3, Z, Z, Z, Z, 4, 5,
// 0_C
Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z,
// 1_A
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 8, 9, 10, 11,
// 1_B
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, Z, Z, Z, Z,
// 1_C
2, 3, Z, Z, Z, Z, Z, Z, 4, 5, Z, Z, Z, Z, Z, Z,
// 1_D
Z, Z, 0, 1, Z, Z, 2, 3, Z, Z, Z, Z, Z, Z, Z, Z,
// 1_E
Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 2_B
8, 9, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 2_C
Z, Z, 6, 7, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 2_D
Z, Z, Z, Z, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 2_E
Z, Z, Z, Z, Z, Z, 2, 3, Z, Z, Z, Z, Z, Z, 4, 5,
// 2_F
Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, 2, 3, Z, Z,
// 2_G
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z,
// 3_A
Z, Z, Z, Z, Z, Z, 12, 13, 14, 15, Z, Z, Z, Z, Z, Z,
// 3_B
Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, 12, 13, Z, Z, Z, Z,
// 3_C
Z, Z, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z,
// 3_D/4_E
6, 7, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 8, 9,
// 4_F
Z, Z, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, Z, Z,
// 4_G
Z, Z, Z, Z, 2, 3, Z, Z, Z, Z, 4, 5, Z, Z, Z, Z,
// 4_H
Z, Z, Z, Z, Z, Z, 0, 1, 2, 3, Z, Z, Z, Z, Z, Z,
// 5_B
Z, Z, Z, Z, 14, 15, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 5_C
Z, Z, 12, 13, Z, Z, 14, 15, Z, Z, Z, Z, Z, Z, Z, Z,
// 5_D
10, 11, Z, Z, Z, Z, Z, Z, 12, 13, Z, Z, Z, Z, Z, Z,
// 5_E
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z,
// 5_F
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 8, 9, Z, Z,
// 5_G
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7,
// 6_D
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 14, 15, Z, Z, Z, Z,
// 6_E
Z, Z, Z, Z, Z, Z, Z, Z, 12, 13, Z, Z, 14, 15, Z, Z,
// 6_F
Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z, 12, 13,
// 6_G
Z, Z, Z, Z, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 6_H
4, 5, 6, 7, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 7_F
Z, Z, Z, Z, Z, Z, Z, Z, 14, 15, Z, Z, Z, Z, Z, Z,
// 7_G
10, 11, Z, Z, Z, Z, 12, 13, Z, Z, 14, 15, Z, Z, Z, Z,
// 7_H
Z, Z, 8, 9, 10, 11, Z, Z, Z, Z, Z, Z, 12, 13, 14, 15
};
/// <summary>
/// Gets shuffle vectors for <see cref="ApplyZigZagOrderingAvx"/>
/// zig zag implementation.
/// </summary>
private static ReadOnlySpan<byte> AvxShuffleMasks => new byte[]
{
// 01_AB/01_EF/23_CD - cross-lane
0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0,
// 01_AB - inner-lane
0, 1, 2, 3, 8, 9, Z, Z, 10, 11, 4, 5, 6, 7, 12, 13, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, 4, 5, 6, 7,
// 01_CD/23_GH - cross-lane
0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, Z, Z, Z, Z, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, Z, Z, Z, Z,
// 01_CD - inner-lane
Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z,
// 01_EF - inner-lane
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 23_AB/45_CD/67_EF - cross-lane
3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, Z, Z, Z, Z, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, Z, Z, Z, Z,
// 23_AB - inner-lane
4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, 0, 1, 2, 3, 8, 9, Z, Z, Z, Z,
// 23_CD - inner-lane
Z, Z, 6, 7, 12, 13, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, 12, 13,
// 23_EF - inner-lane
Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 23_GH - inner-lane
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 45_AB - inner-lane
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 45_CD - inner-lane
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, 0, 1, Z, Z, 2, 3, 8, 9, Z, Z, Z, Z, Z, Z,
// 45_EF - cross-lane
1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, Z, Z, Z, Z, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0,
// 45_EF - inner-lane
2, 3, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, Z, Z,
// 45_GH - inner-lane
Z, Z, Z, Z, 2, 3, 8, 9, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7,
// 67_CD - inner-lane
Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
// 67_EF - inner-lane
Z, Z, Z, Z, Z, Z, 6, 7, 0, 1, Z, Z, 2, 3, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z,
// 67_GH - inner-lane
8, 9, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, 10, 11, 4, 5, Z, Z, 6, 7, 12, 13, 14, 15
};
/// <summary>
/// Applies zig zag ordering for given 8x8 matrix using SSE cpu intrinsics.
/// </summary>
/// <remarks>
/// Requires Ssse3 support.
/// </remarks>
/// <param name="source">Input matrix.</param>
/// <param name="dest">Matrix to store the result. Can be a reference to input matrix.</param>
public static unsafe void ApplyZigZagOrderingSse(ref Block8x8 source, ref Block8x8 dest)
{
DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!");
fixed (byte* maskPtr = SseShuffleMasks)
{
Vector128<byte> A = source.V0.AsByte();
Vector128<byte> B = source.V1.AsByte();
Vector128<byte> C = source.V2.AsByte();
Vector128<byte> D = source.V3.AsByte();
Vector128<byte> E = source.V4.AsByte();
Vector128<byte> F = source.V5.AsByte();
Vector128<byte> G = source.V6.AsByte();
Vector128<byte> H = source.V7.AsByte();
// row0
Vector128<short> row0_A = Ssse3.Shuffle(A, Sse2.LoadVector128(maskPtr + (0 * 16))).AsInt16();
Vector128<short> row0_B = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (1 * 16))).AsInt16();
Vector128<short> row0 = Sse2.Or(row0_A, row0_B);
Vector128<short> row0_C = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (2 * 16))).AsInt16();
row0 = Sse2.Or(row0, row0_C);
// row1
Vector128<short> row1_A = Ssse3.Shuffle(A, Sse2.LoadVector128(maskPtr + (3 * 16))).AsInt16();
Vector128<short> row1_B = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (4 * 16))).AsInt16();
Vector128<short> row1 = Sse2.Or(row1_A, row1_B);
Vector128<short> row1_C = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (5 * 16))).AsInt16();
row1 = Sse2.Or(row1, row1_C);
Vector128<short> row1_D = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (6 * 16))).AsInt16();
row1 = Sse2.Or(row1, row1_D);
Vector128<short> row1_E = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (7 * 16))).AsInt16();
row1 = Sse2.Or(row1, row1_E);
// row2
Vector128<short> row2_B = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (8 * 16))).AsInt16();
Vector128<short> row2_C = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (9 * 16))).AsInt16();
Vector128<short> row2 = Sse2.Or(row2_B, row2_C);
Vector128<short> row2_D = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (10 * 16))).AsInt16();
row2 = Sse2.Or(row2, row2_D);
Vector128<short> row2_E = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (11 * 16))).AsInt16();
row2 = Sse2.Or(row2, row2_E);
Vector128<short> row2_F = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (12 * 16))).AsInt16();
row2 = Sse2.Or(row2, row2_F);
Vector128<short> row2_G = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (13 * 16))).AsInt16();
row2 = Sse2.Or(row2, row2_G);
// row3
Vector128<short> A_3 = Ssse3.Shuffle(A, Sse2.LoadVector128(maskPtr + (14 * 16))).AsInt16().AsInt16();
Vector128<short> B_3 = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (15 * 16))).AsInt16().AsInt16();
Vector128<short> row3 = Sse2.Or(A_3, B_3);
Vector128<short> C_3 = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (16 * 16))).AsInt16();
row3 = Sse2.Or(row3, C_3);
Vector128<byte> D3_E4_shuffleMask = Sse2.LoadVector128(maskPtr + (17 * 16));
Vector128<short> D_3 = Ssse3.Shuffle(D, D3_E4_shuffleMask).AsInt16();
row3 = Sse2.Or(row3, D_3);
// row4
Vector128<short> E_4 = Ssse3.Shuffle(E, D3_E4_shuffleMask).AsInt16();
Vector128<short> F_4 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (18 * 16))).AsInt16();
Vector128<short> row4 = Sse2.Or(E_4, F_4);
Vector128<short> G_4 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (19 * 16))).AsInt16();
row4 = Sse2.Or(row4, G_4);
Vector128<short> H_4 = Ssse3.Shuffle(H, Sse2.LoadVector128(maskPtr + (20 * 16))).AsInt16();
row4 = Sse2.Or(row4, H_4);
// row5
Vector128<short> B_5 = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (21 * 16))).AsInt16();
Vector128<short> C_5 = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (22 * 16))).AsInt16();
Vector128<short> row5 = Sse2.Or(B_5, C_5);
Vector128<short> D_5 = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (23 * 16))).AsInt16();
row5 = Sse2.Or(row5, D_5);
Vector128<short> E_5 = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (24 * 16))).AsInt16();
row5 = Sse2.Or(row5, E_5);
Vector128<short> F_5 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (25 * 16))).AsInt16();
row5 = Sse2.Or(row5, F_5);
Vector128<short> G_5 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (26 * 16))).AsInt16();
row5 = Sse2.Or(row5, G_5);
// row6
Vector128<short> D_6 = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (27 * 16))).AsInt16();
Vector128<short> E_6 = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (28 * 16))).AsInt16();
Vector128<short> row6 = Sse2.Or(D_6, E_6);
Vector128<short> F_6 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (29 * 16))).AsInt16();
row6 = Sse2.Or(row6, F_6);
Vector128<short> G_6 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (30 * 16))).AsInt16();
row6 = Sse2.Or(row6, G_6);
Vector128<short> H_6 = Ssse3.Shuffle(H, Sse2.LoadVector128(maskPtr + (31 * 16))).AsInt16();
row6 = Sse2.Or(row6, H_6);
// row7
Vector128<short> F_7 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (32 * 16))).AsInt16();
Vector128<short> G_7 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (33 * 16))).AsInt16();
Vector128<short> row7 = Sse2.Or(F_7, G_7);
Vector128<short> H_7 = Ssse3.Shuffle(H, Sse2.LoadVector128(maskPtr + (35 * 16))).AsInt16();
row7 = Sse2.Or(row7, H_7);
dest.V0 = row0;
dest.V1 = row1;
dest.V2 = row2;
dest.V3 = row3;
dest.V4 = row4;
dest.V5 = row5;
dest.V6 = row6;
dest.V7 = row7;
}
}
/// <summary>
/// Applies zig zag ordering for given 8x8 matrix using AVX cpu intrinsics.
/// </summary>
/// <remarks>
/// Requires Avx2 support.
/// </remarks>
/// <param name="source">Input matrix.</param>
/// <param name="dest">Matrix to store the result. Can be a reference to input matrix.</param>
public static unsafe void ApplyZigZagOrderingAvx(ref Block8x8 source, ref Block8x8 dest)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
fixed (byte* shuffleVectorsPtr = AvxShuffleMasks)
{
// 18 loads
// 10 cross-lane shuffles (permutations)
// 14 shuffles
// 10 bitwise or's
// 4 stores
// A0 A1 A2 A3 A4 A5 A6 A7 | B0 B1 B2 B3 B4 B5 B6 B7
// C0 C1 C2 C3 C4 C5 C6 C7 | D0 D1 D2 D3 D4 D5 D6 D7
// E0 E1 E2 E3 E4 E5 E6 E7 | F0 F1 F2 F3 F4 F5 F6 F7
// G0 G1 G2 G3 G4 G5 G6 G7 | H0 H1 H2 H3 H4 H5 H6 H7
Vector256<byte> AB = source.V01.AsByte();
Vector256<byte> CD = source.V23.AsByte();
Vector256<byte> EF = source.V45.AsByte();
Vector256<byte> GH = source.V67.AsByte();
// row01 - A0 A1 B0 C0 B1 A2 A3 B2 | C1 D0 E0 D1 C2 B3 A4 A5
Vector256<int> AB01_EF01_CD23_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32();
// row01_AB - (A0 A1) (B0 B1) (A2 A3) (B2 B3) | (B2 B3) (A4 A5) (X X) (X X)
Vector256<byte> row01_AB = Avx2.PermuteVar8x32(AB.AsInt32(), AB01_EF01_CD23_cr_ln_shfmask).AsByte();
// row01_AB - (A0 A1) (B0 X) (B1 A2) (A3 B2) | (X X) (X X) (X B3) (A4 A5)
row01_AB = Avx2.Shuffle(row01_AB, Avx.LoadVector256(shuffleVectorsPtr + (1 * 32))).AsByte();
Vector256<int> CD01_GH23_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (2 * 32)).AsInt32();
// row01_CD - (C0 C1) (X X) (X X) (X X) | (C0 C1) (D0 D1) (C2 C3) (X X)
Vector256<byte> row01_CD = Avx2.PermuteVar8x32(CD.AsInt32(), CD01_GH23_cr_ln_shfmask).AsByte();
// row01_CD - (X X) (X C0) (X X) (X X) | (C1 D0) (X D1) (C2 X) (X X)
row01_CD = Avx2.Shuffle(row01_CD, Avx.LoadVector256(shuffleVectorsPtr + (3 * 32))).AsByte();
// row01_EF - (E0 E1) (E2 E3) (F0 F1) (X X) | (E0 E1) (X X) (X X) (X X)
Vector256<byte> row0123_EF = Avx2.PermuteVar8x32(EF.AsInt32(), AB01_EF01_CD23_cr_ln_shfmask).AsByte();
// row01_EF - (X X) (X X) (X X) (X X) | (X X) (E0 X) (X X) (X X)
Vector256<byte> row01_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (4 * 32))).AsByte();
Vector256<byte> row01 = Avx2.Or(Avx2.Or(row01_AB, row01_CD), row01_EF);
// row23 - B4 C3 D2 E1 F0 G0 F1 E2 | D3 C4 B5 A6 A7 B6 C5 D4
Vector256<int> AB23_CD45_EF67_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (5 * 32)).AsInt32();
// row23_AB - (B4 B5) (X X) (X X) (X X) | (B4 B5) (B6 B7) (A6 A7) (X X)
Vector256<byte> row2345_AB = Avx2.PermuteVar8x32(AB.AsInt32(), AB23_CD45_EF67_cr_ln_shfmask).AsByte();
// row23_AB - (B4 X) (X X) (X X) (X X) | (X X) (B5 A6) (A7 B6) (X X)
Vector256<byte> row23_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (6 * 32))).AsByte();
// row23_CD - (C2 C3) (D2 D3) (X X) (X X) | (D2 D3) (C4 C5) (D4 D5) (X X)
Vector256<byte> row23_CD = Avx2.PermuteVar8x32(CD.AsInt32(), AB01_EF01_CD23_cr_ln_shfmask).AsByte();
// row23_CD - (X C3) (D2 X) (X X) (X X) | (D3 C4) (X X) (X X) (C5 D4)
row23_CD = Avx2.Shuffle(row23_CD, Avx.LoadVector256(shuffleVectorsPtr + (7 * 32))).AsByte();
// row23_EF - (X X) (X E1) (F0 X) (F1 E2) | (X X) (X X) (X X) (X X)
Vector256<byte> row23_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (8 * 32))).AsByte();
// row23_GH - (G0 G1) (G2 G3) (H0 H1) (X X) | (G2 G3) (X X) (X X) (X X)
Vector256<byte> row2345_GH = Avx2.PermuteVar8x32(GH.AsInt32(), CD01_GH23_cr_ln_shfmask).AsByte();
// row23_GH - (X X) (X X) (X G0) (X X) | (X X) (X X) (X X) (X X)
Vector256<byte> row23_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (9 * 32)).AsByte());
Vector256<byte> row23 = Avx2.Or(Avx2.Or(row23_AB, row23_CD), Avx2.Or(row23_EF, row23_GH));
// row45 - E3 F2 G1 H0 H1 G2 F3 E4 | D5 C6 B7 C7 D6 E5 F4 G3
// row45_AB - (X X) (X X) (X X) (X X) | (X X) (B7 X) (X X) (X X)
Vector256<byte> row45_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (10 * 32)).AsByte());
// row45_CD - (D6 D7) (X X) (X X) (X X) | (C6 C7) (D4 D5) (D6 D7) (X X)
Vector256<byte> row4567_CD = Avx2.PermuteVar8x32(CD.AsInt32(), AB23_CD45_EF67_cr_ln_shfmask).AsByte();
// row45_CD - (X X) (X X) (X X) (X X) | (D5 C6) (X C7) (D6 X) (X X)
Vector256<byte> row45_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (11 * 32)).AsByte());
Vector256<int> EF45_GH67_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (12 * 32)).AsInt32();
// row45_EF - (E2 E3) (E4 E5) (F2 F3) (X X) | (E4 E5) (F4 F5) (X X) (X X)
Vector256<byte> row45_EF = Avx2.PermuteVar8x32(EF.AsInt32(), EF45_GH67_cr_ln_shfmask).AsByte();
// row45_EF - (E3 F2) (X X) (X X) (F3 E4) | (X X) (X X) (X E5) (F4 X)
row45_EF = Avx2.Shuffle(row45_EF, Avx.LoadVector256(shuffleVectorsPtr + (13 * 32)).AsByte());
// row45_GH - (X X) (G1 H0) (H1 G2) (X X) | (X X) (X X) (X X) (X G3)
Vector256<byte> row45_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (14 * 32)).AsByte());
Vector256<byte> row45 = Avx2.Or(Avx2.Or(row45_AB, row45_CD), Avx2.Or(row45_EF, row45_GH));
// row67 - H2 H3 G4 F5 E6 D7 E7 F6 | G5 H4 H5 G6 F7 G7 H6 H7
// row67_CD - (X X) (X X) (X D7) (X X) | (X X) (X X) (X X) (X X)
Vector256<byte> row67_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (15 * 32)).AsByte());
// row67_EF - (E6 E7) (F4 F5) (F6 F7) (X X) | (F6 F7) (X X) (X X) (X X)
Vector256<byte> row67_EF = Avx2.PermuteVar8x32(EF.AsInt32(), AB23_CD45_EF67_cr_ln_shfmask).AsByte();
// row67_EF - (X X) (X F5) (E6 X) (E7 F6) | (X X) (X X) (F7 X) (X X)
row67_EF = Avx2.Shuffle(row67_EF, Avx.LoadVector256(shuffleVectorsPtr + (16 * 32)).AsByte());
// row67_GH - (G4 G5) (H2 H3) (X X) (X X) | (G4 G5) (G6 G7) (H4 H5) (H6 H7)
Vector256<byte> row67_GH = Avx2.PermuteVar8x32(GH.AsInt32(), EF45_GH67_cr_ln_shfmask).AsByte();
// row67_GH - (H2 H3) (G4 X) (X X) (X X) | (G5 H4) (H5 G6) (X G7) (H6 H7)
row67_GH = Avx2.Shuffle(row67_GH, Avx.LoadVector256(shuffleVectorsPtr + (17 * 32)).AsByte());
Vector256<byte> row67 = Avx2.Or(Avx2.Or(row67_CD, row67_EF), row67_GH);
dest.V01 = row01.AsInt16();
dest.V23 = row23.AsInt16();
dest.V45 = row45.AsInt16();
dest.V67 = row67.AsInt16();
}
}
}
}
#endif

79
src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs

@ -4,19 +4,17 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// Holds the Jpeg UnZig array in a value/stack type.
/// Unzig maps from the zigzag ordering to the natural ordering. For example,
/// unzig[3] is the column and row of the fourth element in zigzag order. The
/// value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
/// </summary>
[StructLayout(LayoutKind.Sequential)]
internal unsafe struct ZigZag
internal static partial class ZigZag
{
/// <summary>
/// Gets span of zig-zag ordering indices.
/// </summary>
/// <remarks>
/// When reading corrupted data, the Huffman decoders could attempt
/// to reference an entry beyond the end of this array (if the decoded
/// zero run length reaches past the end of the block). To prevent
@ -25,20 +23,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// to be stored in location 63 of the block, not somewhere random.
/// The worst case would be a run-length of 15, which means we need 16
/// fake entries.
/// </summary>
private const int Size = 64 + 16;
/// <summary>
/// Copy of <see cref="Unzig"/> in a value type
/// </summary>
public fixed byte Data[Size];
/// <summary>
/// Gets the unzigs map, which maps from the zigzag ordering to the natural ordering.
/// For example, unzig[3] is the column and row of the fourth element in zigzag order.
/// The value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
/// </summary>
private static ReadOnlySpan<byte> Unzig => new byte[]
/// </remarks>
public static ReadOnlySpan<byte> ZigZagOrder => new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
@ -48,53 +34,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63,
63, 63, 63, 63, 63, 63, 63, 63, // Extra entries for safety in decoder
// Extra entries for safety in decoder
63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63
};
/// <summary>
/// Returns the value at the given index
/// </summary>
/// <param name="idx">The index</param>
/// <returns>The <see cref="byte"/></returns>
public byte this[int idx]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
ref byte self = ref Unsafe.As<ZigZag, byte>(ref this);
return Unsafe.Add(ref self, idx);
}
}
/// <summary>
/// Creates and fills an instance of <see cref="ZigZag"/> with Jpeg unzig indices
/// </summary>
/// <returns>The new instance</returns>
public static ZigZag CreateUnzigTable()
{
ZigZag result = default;
ref byte sourceRef = ref MemoryMarshal.GetReference(Unzig);
ref byte destinationRef = ref Unsafe.AsRef<byte>(result.Data);
Unzig.CopyTo(new Span<byte>(result.Data, Size));
return result;
}
/// <summary>
/// Apply Zigging to the given quantization table, so it will be sufficient to multiply blocks for dequantizing them.
/// </summary>
public static Block8x8F CreateDequantizationTable(ref Block8x8F qt)
{
Block8x8F result = default;
for (int i = 0; i < Block8x8F.Size; i++)
{
result[Unzig[i]] = qt[i];
}
return result;
}
}
}

6
src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs

@ -740,9 +740,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
stream.Read(this.temp, 0, 64);
remaining -= 64;
// Parsing quantization table & saving it in natural order
for (int j = 0; j < 64; j++)
{
table[j] = this.temp[j];
table[ZigZag.ZigZagOrder[j]] = this.temp[j];
}
break;
@ -760,9 +761,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
stream.Read(this.temp, 0, 128);
remaining -= 128;
// Parsing quantization table & saving it in natural order
for (int j = 0; j < 64; j++)
{
table[j] = (this.temp[2 * j] << 8) | this.temp[(2 * j) + 1];
table[ZigZag.ZigZagOrder[j]] = (this.temp[2 * j] << 8) | this.temp[(2 * j) + 1];
}
break;

12
src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs

@ -151,7 +151,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
dqt[offset++] = (byte)i;
for (int j = 0; j < Block8x8F.Size; j++)
{
dqt[offset++] = (byte)quant[j];
dqt[offset++] = (byte)quant[ZigZag.ZigZagOrder[j]];
}
}
@ -635,11 +635,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
/// Initializes quntization tables.
/// </summary>
/// <remarks>
/// <para>
/// Zig-zag ordering is NOT applied to the resulting tables.
/// </para>
/// <para>
/// We take quality values in a hierarchical order:
/// 1. Check if encoder has set quality
/// 2. Check if metadata has special table for encoding
/// 3. Check if metadata has set quality
/// 4. Take default quality value - 75
/// 2. Check if metadata has set quality
/// 3. Take default quality value - 75
/// </para>
/// </remarks>
/// <param name="componentCount">Color components count.</param>
/// <param name="metadata">Jpeg metadata instance.</param>

74
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -272,32 +272,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.CompareBlocks(expected, actual, 0);
}
// TODO: intrinsic tests
[Theory]
[InlineData(1)]
[InlineData(2)]
public unsafe void Quantize(int seed)
[InlineData(1, 2)]
[InlineData(2, 1)]
public void Quantize(int srcSeed, int qtSeed)
{
var block = default(Block8x8F);
block.LoadFrom(Create8x8RoundedRandomFloatData(-2000, 2000, seed));
var qt = default(Block8x8F);
qt.LoadFrom(Create8x8RoundedRandomFloatData(-2000, 2000, seed));
var unzig = ZigZag.CreateUnzigTable();
Block8x8F source = CreateRandomFloatBlock(-2000, 2000, srcSeed);
Block8x8F quant = CreateRandomFloatBlock(-2000, 2000, qtSeed);
int* expectedResults = stackalloc int[Block8x8F.Size];
ReferenceImplementations.QuantizeRational(&block, expectedResults, &qt, unzig.Data);
Block8x8 expected = default;
ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.ZigZagOrder);
var actualResults = default(Block8x8F);
Block8x8 actual = default;
Block8x8F.Quantize(ref source, ref actual, ref quant);
Block8x8F.Quantize(ref block, ref actualResults, ref qt, ref unzig);
for (int i = 0; i < Block8x8F.Size; i++)
for (int i = 0; i < Block8x8.Size; i++)
{
int expected = expectedResults[i];
int actual = (int)actualResults[i];
Assert.Equal(expected, actual);
Assert.Equal(expected[i], actual[i]);
}
}
@ -368,48 +360,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public unsafe void DequantizeBlock(int seed)
{
Block8x8F original = CreateRandomFloatBlock(-500, 500, seed);
Block8x8F qt = CreateRandomFloatBlock(0, 10, seed + 42);
var unzig = ZigZag.CreateUnzigTable();
Block8x8F expected = original;
Block8x8F actual = original;
ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data);
Block8x8F.DequantizeBlock(&actual, &qt, unzig.Data);
this.CompareBlocks(expected, actual, 0);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public unsafe void ZigZag_CreateDequantizationTable_MultiplicationShouldQuantize(int seed)
{
Block8x8F original = CreateRandomFloatBlock(-500, 500, seed);
Block8x8F qt = CreateRandomFloatBlock(0, 10, seed + 42);
var unzig = ZigZag.CreateUnzigTable();
Block8x8F zigQt = ZigZag.CreateDequantizationTable(ref qt);
Block8x8F expected = original;
Block8x8F actual = original;
ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data);
actual.MultiplyInPlace(ref zigQt);
this.CompareBlocks(expected, actual, 0);
}
[Fact]
public void AddToAllInPlace()
{

8
tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs

@ -21,7 +21,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Block8x8F table = JpegQuantization.ScaleLuminanceTable(quality);
int estimatedQuality = JpegQuantization.EstimateLuminanceQuality(ref table);
Assert.True(quality.Equals(estimatedQuality), $"Failed to estimate luminance quality for standard table at quality level {quality}");
Assert.True(
quality.Equals(estimatedQuality),
$"Failed to estimate luminance quality for standard table at quality level {quality}");
}
}
@ -35,7 +37,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Block8x8F table = JpegQuantization.ScaleChrominanceTable(quality);
int estimatedQuality = JpegQuantization.EstimateChrominanceQuality(ref table);
Assert.True(quality.Equals(estimatedQuality), $"Failed to estimate chrominance quality for standard table at quality level {quality}");
Assert.True(
quality.Equals(estimatedQuality),
$"Failed to estimate chrominance quality for standard table at quality level {quality}");
}
}
}

54
tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs

@ -15,18 +15,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
/// </summary>
internal static partial class ReferenceImplementations
{
public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
public static void DequantizeBlock(ref Block8x8F block, ref Block8x8F qt, ReadOnlySpan<byte> zigzag)
{
float* b = (float*)blockPtr;
float* qtp = (float*)qtPtr;
for (int qtIndex = 0; qtIndex < Block8x8F.Size; qtIndex++)
for (int i = 0; i < Block8x8F.Size; i++)
{
byte i = unzigPtr[qtIndex];
float* unzigPos = b + i;
float val = *unzigPos;
val *= qtp[qtIndex];
*unzigPos = val;
int zig = zigzag[i];
block[zig] *= qt[i];
}
}
@ -101,42 +95,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
/// <summary>
/// Reference implementation to test <see cref="Block8x8F.Quantize"/>.
/// Rounding is done used an integer-based algorithm defined in <see cref="RationalRound(int,int)"/>.
/// </summary>
/// <param name="src">The input block</param>
/// <param name="dest">The destination block of integers</param>
/// <param name="qt">The quantization table</param>
/// <param name="unzigPtr">Pointer to <see cref="ZigZag.Data"/> </param>
public static unsafe void QuantizeRational(Block8x8F* src, int* dest, Block8x8F* qt, byte* unzigPtr)
/// <param name="src">The input block.</param>
/// <param name="dest">The destination block of 16bit integers.</param>
/// <param name="qt">The quantization table.</param>
/// <param name="zigzag">Zig-Zag index sequence span.</param>
public static void Quantize(ref Block8x8F src, ref Block8x8 dest, ref Block8x8F qt, ReadOnlySpan<byte> zigzag)
{
float* s = (float*)src;
float* q = (float*)qt;
for (int zig = 0; zig < Block8x8F.Size; zig++)
for (int i = 0; i < Block8x8F.Size; i++)
{
int a = (int)s[unzigPtr[zig]];
int b = (int)q[zig];
int val = RationalRound(a, b);
dest[zig] = val;
int zig = zigzag[i];
dest[i] = (short)Math.Round(src[zig] / qt[zig], MidpointRounding.AwayFromZero);
}
}
/// <summary>
/// Rounds a rational number defined as dividend/divisor into an integer.
/// </summary>
/// <param name="dividend">The dividend.</param>
/// <param name="divisor">The divisor.</param>
/// <returns>The rounded value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int RationalRound(int dividend, int divisor)
{
if (dividend >= 0)
{
return (dividend + (divisor >> 1)) / divisor;
}
return -((-dividend + (divisor >> 1)) / divisor);
}
}
}

5
tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs

@ -13,8 +13,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void ZigZagCanHandleAllPossibleCoefficients()
{
// Mimic the behaviour of the huffman scan decoder using all possible byte values
var block = new short[64];
var zigzag = ZigZag.CreateUnzigTable();
short[] block = new short[64];
for (int h = 0; h < 255; h++)
{
@ -27,7 +26,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
if (s != 0)
{
i += r;
block[zigzag[i++]] = (short)s;
block[ZigZag.ZigZagOrder[i++]] = (short)s;
}
else
{

Loading…
Cancel
Save