diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
index 3e5277c063..c76eb942fd 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
@@ -12,7 +12,7 @@ using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
///
- /// 8x8 coefficients matrix of type.
+ /// 8x8 matrix of coefficients.
///
// ReSharper disable once InconsistentNaming
[StructLayout(LayoutKind.Explicit)]
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
new file mode 100644
index 0000000000..073580d40c
--- /dev/null
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
@@ -0,0 +1,87 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components
+{
+ internal partial struct Block8x8F
+ {
+ ///
+ /// A number of rows of 8 scalar coefficients each in
+ ///
+ public const int RowCount = 8;
+
+ [FieldOffset(0)]
+ public Vector256 V0;
+ [FieldOffset(32)]
+ public Vector256 V1;
+ [FieldOffset(64)]
+ public Vector256 V2;
+ [FieldOffset(96)]
+ public Vector256 V3;
+ [FieldOffset(128)]
+ public Vector256 V4;
+ [FieldOffset(160)]
+ public Vector256 V5;
+ [FieldOffset(192)]
+ public Vector256 V6;
+ [FieldOffset(224)]
+ public Vector256 V7;
+
+ private static ReadOnlySpan DivideIntoInt16_Avx2_ShuffleMask => new int[] {
+ 0, 1, 4, 5, 2, 3, 6, 7
+ };
+
+ private static unsafe void DivideIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
+ {
+ DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
+
+ fixed (int* maskPtr = DivideIntoInt16_Avx2_ShuffleMask)
+ {
+ Vector256 crossLaneShuffleMask = Avx.LoadVector256(maskPtr).AsInt32();
+
+ ref Vector256 aBase = ref Unsafe.As>(ref a);
+ ref Vector256 bBase = ref Unsafe.As>(ref b);
+
+ ref Vector256 destBase = ref Unsafe.As>(ref dest);
+
+ for (int i = 0; i < 8; i += 2)
+ {
+ Vector256 row0 = Avx.ConvertToVector256Int32(Avx.Divide(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
+ Vector256 row1 = Avx.ConvertToVector256Int32(Avx.Divide(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
+
+ Vector256 row = Avx2.PackSignedSaturate(row0, row1);
+ row = Avx2.PermuteVar8x32(row.AsInt32(), crossLaneShuffleMask).AsInt16();
+
+ Unsafe.Add(ref destBase, i / 2) = row;
+ }
+ }
+ }
+
+ private static void DivideIntoInt16_Sse2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
+ {
+ DebugGuard.IsTrue(Sse2.IsSupported, "Sse2 support is required to run this operation!");
+
+ ref Vector128 aBase = ref Unsafe.As>(ref a);
+ ref Vector128 bBase = ref Unsafe.As>(ref b);
+
+ ref Vector128 destBase = ref Unsafe.As>(ref dest);
+
+ for (int i = 0; i < 16; i += 2)
+ {
+ Vector128 left = Sse2.ConvertToVector128Int32(Sse.Divide(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
+ Vector128 right = Sse2.ConvertToVector128Int32(Sse.Divide(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
+
+ Vector128 row = Sse2.PackSignedSaturate(left, right);
+ Unsafe.Add(ref destBase, i / 2) = row;
+ }
+ }
+ }
+}
+#endif
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs
index 23cf4ce4a9..498fe4d03b 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
index 8479cdc970..79a35e2cd6 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
@@ -16,7 +16,7 @@ using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
///
- /// 8x8 coefficients matrix of type.
+ /// 8x8 matrix of coefficients.
///
[StructLayout(LayoutKind.Explicit)]
internal partial struct Block8x8F : IEquatable
@@ -66,30 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public Vector4 V7L;
[FieldOffset(240)]
public Vector4 V7R;
-
-#if SUPPORTS_RUNTIME_INTRINSICS
- ///
- /// A number of rows of 8 scalar coefficients each in
- ///
- public const int RowCount = 8;
-
- [FieldOffset(0)]
- public Vector256 V0;
- [FieldOffset(32)]
- public Vector256 V1;
- [FieldOffset(64)]
- public Vector256 V2;
- [FieldOffset(96)]
- public Vector256 V3;
- [FieldOffset(128)]
- public Vector256 V4;
- [FieldOffset(160)]
- public Vector256 V5;
- [FieldOffset(192)]
- public Vector256 V6;
- [FieldOffset(224)]
- public Vector256 V7;
-#endif
#pragma warning restore SA1600 // ElementsMustBeDocumented
///
@@ -188,13 +164,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return result;
}
- ///
- /// Fill the block with defaults (zeroes).
- ///
- [MethodImpl(InliningOptions.ShortMethod)]
- public void Clear()
- => this = default; // The cheapest way to do this in C#:
-
///
/// Load raw 32bit floating point data from source.
///
@@ -302,7 +271,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public float[] ToArray()
{
- var result = new float[Size];
+ float[] result = new float[Size];
this.ScaledCopyTo(result);
return result;
}
@@ -434,102 +403,37 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
///
- /// Quantize the block.
- ///
- /// The block pointer.
- /// The qt pointer.
- /// Unzig pointer
- public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
- {
- float* b = (float*)blockPtr;
- float* qtp = (float*)qtPtr;
- for (int qtIndex = 0; qtIndex < Size; qtIndex++)
- {
- byte blockIndex = unzigPtr[qtIndex];
- float* unzigPos = b + blockIndex;
-
- float val = *unzigPos;
- val *= qtp[qtIndex];
- *unzigPos = val;
- }
- }
-
- ///
- /// Quantize 'block' into 'dest' using the 'qt' quantization table:
- /// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values.
- /// To finish the rounding it's enough to (int)-cast these values.
+ /// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
///
- /// Source block
- /// Destination block
- /// The quantization table
- /// The 8x8 Unzig block.
- public static unsafe void Quantize(
- ref Block8x8F block,
- ref Block8x8F dest,
- ref Block8x8F qt,
- ref ZigZag unZig)
+ /// Source block.
+ /// Destination block.
+ /// The quantization table.
+ public static void Quantize(ref Block8x8F block, ref Block8x8 dest, ref Block8x8F qt)
{
- for (int zig = 0; zig < Size; zig++)
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
{
- dest[zig] = block[unZig[zig]];
+ DivideIntoInt16_Avx2(ref block, ref qt, ref dest);
+ ZigZag.ApplyZigZagOrderingAvx(ref dest, ref dest);
}
-
- DivideRoundAll(ref dest, ref qt);
- }
-
- [MethodImpl(InliningOptions.ShortMethod)]
- private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
- {
-#if SUPPORTS_RUNTIME_INTRINSICS
- if (Avx.IsSupported)
+ else if (Ssse3.IsSupported)
{
- var vnegOne = Vector256.Create(-1f);
- var vadd = Vector256.Create(.5F);
- var vone = Vector256.Create(1f);
-
- for (int i = 0; i < RowCount; i++)
- {
- ref Vector256 aRow = ref Unsafe.Add(ref a.V0, i);
- ref Vector256 bRow = ref Unsafe.Add(ref b.V0, i);
- Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aRow), vone), vadd);
- aRow = Avx.Add(Avx.Divide(aRow, bRow), voff);
- }
+ DivideIntoInt16_Sse2(ref block, ref qt, ref dest);
+ ZigZag.ApplyZigZagOrderingSse(ref dest, ref dest);
}
else
#endif
{
- a.V0L = DivideRound(a.V0L, b.V0L);
- a.V0R = DivideRound(a.V0R, b.V0R);
- a.V1L = DivideRound(a.V1L, b.V1L);
- a.V1R = DivideRound(a.V1R, b.V1R);
- a.V2L = DivideRound(a.V2L, b.V2L);
- a.V2R = DivideRound(a.V2R, b.V2R);
- a.V3L = DivideRound(a.V3L, b.V3L);
- a.V3R = DivideRound(a.V3R, b.V3R);
- a.V4L = DivideRound(a.V4L, b.V4L);
- a.V4R = DivideRound(a.V4R, b.V4R);
- a.V5L = DivideRound(a.V5L, b.V5L);
- a.V5R = DivideRound(a.V5R, b.V5R);
- a.V6L = DivideRound(a.V6L, b.V6L);
- a.V6R = DivideRound(a.V6R, b.V6R);
- a.V7L = DivideRound(a.V7L, b.V7L);
- a.V7R = DivideRound(a.V7R, b.V7R);
+ for (int i = 0; i < Size; i++)
+ {
+ // TODO: find a way to index block & qt matrices with natural order indices for performance?
+ int zig = ZigZag.ZigZagOrder[i];
+ float divRes = block[zig] / qt[zig];
+ dest[i] = (short)(divRes + (divRes > 0 ? 0.5f : -0.5f));
+ }
}
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
- {
- var neg = new Vector4(-1);
- var add = new Vector4(.5F);
-
- // sign(dividend) = max(min(dividend, 1), -1)
- Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One);
-
- // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
- return (dividend / divisor) + (sign * add);
- }
-
public void RoundInto(ref Block8x8 dest)
{
for (int i = 0; i < Size; i++)
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
index 70a4465121..bbc4e40af3 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
@@ -54,9 +54,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
///
private readonly HuffmanTable[] acHuffmanTables;
- // The unzig data.
- private ZigZag dctZigZag;
-
private HuffmanScanBuffer scanBuffer;
private readonly SpectralConverter spectralConverter;
@@ -74,7 +71,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
SpectralConverter converter,
CancellationToken cancellationToken)
{
- this.dctZigZag = ZigZag.CreateUnzigTable();
this.stream = stream;
this.spectralConverter = converter;
this.cancellationToken = cancellationToken;
@@ -477,7 +473,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
ref short blockDataRef = ref Unsafe.As(ref block);
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
- ref ZigZag zigzag = ref this.dctZigZag;
// DC
int t = buffer.DecodeHuffman(ref dcTable);
@@ -502,7 +497,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
i += r;
s = buffer.Receive(s);
- Unsafe.Add(ref blockDataRef, zigzag[i++]) = (short)s;
+ Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i++]) = (short)s;
}
else
{
@@ -556,7 +551,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
- ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
int low = this.SuccessiveLow;
@@ -572,7 +566,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (s != 0)
{
s = buffer.Receive(s);
- Unsafe.Add(ref blockDataRef, zigzag[i]) = (short)(s << low);
+ Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i]) = (short)(s << low);
}
else
{
@@ -602,7 +596,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
// Refinement scan for these AC coefficients
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
- ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
@@ -649,7 +642,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
do
{
- ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
+ ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{
buffer.CheckBits();
@@ -675,7 +668,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if ((s != 0) && (k < 64))
{
- Unsafe.Add(ref blockDataRef, zigzag[k]) = (short)s;
+ Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]) = (short)s;
}
}
}
@@ -684,7 +677,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
for (; k <= end; k++)
{
- ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
+ ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs
index 391dac784f..0b80acc5dc 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs
@@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
IJpegComponent[] Components { get; }
///
- /// Gets the quantization tables, in zigzag order.
+ /// Gets the quantization tables, in natural order.
///
Block8x8F[] QuantizationTables { get; }
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
index 7cfbaddcc1..00169d082b 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
@@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public JpegBlockPostProcessor(IRawJpegData decoder, IJpegComponent component)
{
int qtIndex = component.QuantizationTableIndex;
- this.DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]);
+ this.DequantiazationTable = decoder.QuantizationTables[qtIndex];
this.subSamplingDivisors = component.SubSamplingDivisors;
this.SourceBlock = default;
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
index a6334e2da4..8b61b66c9e 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
@@ -96,6 +96,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
private Block8x8F temporalBlock1;
private Block8x8F temporalBlock2;
+ private Block8x8 temporalShortBlock;
///
/// The output stream. All attempted writes after the first error become no-ops.
@@ -132,8 +133,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
- var unzig = ZigZag.CreateUnzigTable();
-
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@@ -156,22 +155,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
- ref luminanceQuantTable,
- ref unzig);
+ ref luminanceQuantTable);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
- ref chrominanceQuantTable,
- ref unzig);
+ ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
- ref chrominanceQuantTable,
- ref unzig);
+ ref chrominanceQuantTable);
if (this.IsFlushNeeded)
{
@@ -197,8 +193,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
- var unzig = ZigZag.CreateUnzigTable();
-
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
ImageFrame frame = pixels.Frames.RootFrame;
@@ -222,30 +216,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YLeft,
- ref luminanceQuantTable,
- ref unzig);
+ ref luminanceQuantTable);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YRight,
- ref luminanceQuantTable,
- ref unzig);
+ ref luminanceQuantTable);
}
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
- ref chrominanceQuantTable,
- ref unzig);
+ ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
- ref chrominanceQuantTable,
- ref unzig);
+ ref chrominanceQuantTable);
if (this.IsFlushNeeded)
{
@@ -269,8 +259,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
- var unzig = ZigZag.CreateUnzigTable();
-
// ReSharper disable once InconsistentNaming
int prevDCY = 0;
@@ -292,8 +280,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
- ref luminanceQuantTable,
- ref unzig);
+ ref luminanceQuantTable);
if (this.IsFlushNeeded)
{
@@ -320,28 +307,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex index,
int prevDC,
ref Block8x8F src,
- ref Block8x8F quant,
- ref ZigZag unZig)
+ ref Block8x8F quant)
{
ref Block8x8F refTemp1 = ref this.temporalBlock1;
ref Block8x8F refTemp2 = ref this.temporalBlock2;
+ ref Block8x8 spectralBlock = ref this.temporalShortBlock;
FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2);
- Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig);
+ Block8x8F.Quantize(ref refTemp1, ref spectralBlock, ref quant);
// Emit the DC delta.
- int dc = (int)refTemp2[0];
+ int dc = spectralBlock[0];
this.EmitHuffRLE(this.huffmanTables[2 * (int)index].Values, 0, dc - prevDC);
// Emit the AC components.
int[] acHuffTable = this.huffmanTables[(2 * (int)index) + 1].Values;
int runLength = 0;
- int lastValuableIndex = refTemp2.GetLastNonZeroIndex();
+ int lastValuableIndex = spectralBlock.GetLastNonZeroIndex();
for (int zig = 1; zig <= lastValuableIndex; zig++)
{
- int ac = (int)refTemp2[zig];
+ int ac = spectralBlock[zig];
if (ac == 0)
{
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Quantization.cs b/src/ImageSharp/Formats/Jpeg/Components/Quantization.cs
index 2ff56c63b9..eab5e6a082 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Quantization.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Quantization.cs
@@ -39,53 +39,59 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public const int QualityEstimationConfidenceUpperThreshold = 98;
///
- /// Gets the unscaled luminance quantization table in zig-zag order. Each
- /// encoder copies and scales the tables according to its quality parameter.
- /// The values are derived from ITU section K.1 after converting from natural to
- /// zig-zag order.
+ /// Gets unscaled luminance quantization table.
///
+ ///
+ /// The values are derived from ITU section K.1.
+ ///
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
- public static ReadOnlySpan UnscaledQuant_Luminance => new byte[]
+ public static ReadOnlySpan LuminanceTable => new byte[]
{
- 16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24,
- 40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60,
- 57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80,
- 109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112,
- 100, 120, 92, 101, 103, 99,
+ 16, 11, 10, 16, 24, 40, 51, 61,
+ 12, 12, 14, 19, 26, 58, 60, 55,
+ 14, 13, 16, 24, 40, 57, 69, 56,
+ 14, 17, 22, 29, 51, 87, 80, 62,
+ 18, 22, 37, 56, 68, 109, 103, 77,
+ 24, 35, 55, 64, 81, 104, 113, 92,
+ 49, 64, 78, 87, 103, 121, 120, 101,
+ 72, 92, 95, 98, 112, 100, 103, 99,
};
///
- /// Gets the unscaled chrominance quantization table in zig-zag order. Each
- /// encoder copies and scales the tables according to its quality parameter.
- /// The values are derived from ITU section K.1 after converting from natural to
- /// zig-zag order.
+ /// Gets unscaled chrominance quantization table.
///
+ ///
+ /// The values are derived from ITU section K.1.
+ ///
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
- public static ReadOnlySpan UnscaledQuant_Chrominance => new byte[]
+ public static ReadOnlySpan ChrominanceTable => new byte[]
{
- 17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66,
- 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 17, 18, 24, 47, 99, 99, 99, 99,
+ 18, 21, 26, 66, 99, 99, 99, 99,
+ 24, 26, 56, 99, 99, 99, 99, 99,
+ 47, 66, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
};
/// Ported from JPEGsnoop:
/// https://github.com/ImpulseAdventure/JPEGsnoop/blob/9732ee0961f100eb69bbff4a0c47438d5997abee/source/JfifDecode.cpp#L4570-L4694
///
- /// Estimates jpeg quality based on quantization table in zig-zag order.
+ /// Estimates jpeg quality based on standard quantization table.
///
///
- /// This technically can be used with any given table but internal decoder code uses ITU spec tables:
- /// and .
+ /// Technically, this can be used with any given table but internal decoder code uses ITU spec tables:
+ /// and .
///
/// Input quantization table.
- /// Quantization to estimate against.
- /// Estimated quality
+ /// Natural order quantization table to estimate against.
+ /// Estimated quality.
public static int EstimateQuality(ref Block8x8F table, ReadOnlySpan target)
{
// This method can be SIMD'ified if standard table is injected as Block8x8F.
@@ -106,11 +112,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
int quality;
for (int i = 0; i < Block8x8F.Size; i++)
{
- float coeff = table[i];
- int coeffInteger = (int)coeff;
+ int coeff = (int)table[i];
// Coefficients are actually int16 casted to float numbers so there's no truncating error.
- if (coeffInteger != 0)
+ if (coeff != 0)
{
comparePercent = 100.0 * (table[i] / target[i]);
}
@@ -152,7 +157,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// Estimated quality
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EstimateLuminanceQuality(ref Block8x8F luminanceTable)
- => EstimateQuality(ref luminanceTable, UnscaledQuant_Luminance);
+ => EstimateQuality(ref luminanceTable, LuminanceTable);
///
/// Estimates jpeg quality based on quantization table in zig-zag order.
@@ -161,7 +166,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// Estimated quality
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EstimateChrominanceQuality(ref Block8x8F chrominanceTable)
- => EstimateQuality(ref chrominanceTable, UnscaledQuant_Chrominance);
+ => EstimateQuality(ref chrominanceTable, ChrominanceTable);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int QualityToScale(int quality)
@@ -185,10 +190,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Block8x8F ScaleLuminanceTable(int quality)
- => ScaleQuantizationTable(scale: QualityToScale(quality), UnscaledQuant_Luminance);
+ => ScaleQuantizationTable(scale: QualityToScale(quality), LuminanceTable);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Block8x8F ScaleChrominanceTable(int quality)
- => ScaleQuantizationTable(scale: QualityToScale(quality), UnscaledQuant_Chrominance);
+ => ScaleQuantizationTable(scale: QualityToScale(quality), ChrominanceTable);
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs
new file mode 100644
index 0000000000..066eb28469
--- /dev/null
+++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs
@@ -0,0 +1,404 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components
+{
+ internal static partial class ZigZag
+ {
+ ///
+ /// Special byte value to zero out elements during Sse/Avx shuffle intrinsics.
+ ///
+ private const byte Z = 0xff;
+
+ ///
+ /// Gets shuffle vectors for
+ /// zig zag implementation.
+ ///
+ private static ReadOnlySpan SseShuffleMasks => new byte[]
+ {
+ // 0_A
+ 0, 1, 2, 3, Z, Z, Z, Z, Z, Z, 4, 5, 6, 7, Z, Z,
+ // 0_B
+ Z, Z, Z, Z, 0, 1, Z, Z, 2, 3, Z, Z, Z, Z, 4, 5,
+ // 0_C
+ Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 1_A
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 8, 9, 10, 11,
+ // 1_B
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, Z, Z, Z, Z,
+ // 1_C
+ 2, 3, Z, Z, Z, Z, Z, Z, 4, 5, Z, Z, Z, Z, Z, Z,
+ // 1_D
+ Z, Z, 0, 1, Z, Z, 2, 3, Z, Z, Z, Z, Z, Z, Z, Z,
+ // 1_E
+ Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 2_B
+ 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+ // 2_C
+ Z, Z, 6, 7, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+ // 2_D
+ Z, Z, Z, Z, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+ // 2_E
+ Z, Z, Z, Z, Z, Z, 2, 3, Z, Z, Z, Z, Z, Z, 4, 5,
+ // 2_F
+ Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, 2, 3, Z, Z,
+ // 2_G
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z,
+
+ // 3_A
+ Z, Z, Z, Z, Z, Z, 12, 13, 14, 15, Z, Z, Z, Z, Z, Z,
+ // 3_B
+ Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, 12, 13, Z, Z, Z, Z,
+ // 3_C
+ Z, Z, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z,
+ // 3_D/4_E
+ 6, 7, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 8, 9,
+
+ // 4_F
+ Z, Z, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, Z, Z,
+ // 4_G
+ Z, Z, Z, Z, 2, 3, Z, Z, Z, Z, 4, 5, Z, Z, Z, Z,
+ // 4_H
+ Z, Z, Z, Z, Z, Z, 0, 1, 2, 3, Z, Z, Z, Z, Z, Z,
+
+ // 5_B
+ Z, Z, Z, Z, 14, 15, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+ // 5_C
+ Z, Z, 12, 13, Z, Z, 14, 15, Z, Z, Z, Z, Z, Z, Z, Z,
+ // 5_D
+ 10, 11, Z, Z, Z, Z, Z, Z, 12, 13, Z, Z, Z, Z, Z, Z,
+ // 5_E
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z,
+ // 5_F
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 8, 9, Z, Z,
+ // 5_G
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7,
+
+ // 6_D
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 14, 15, Z, Z, Z, Z,
+ // 6_E
+ Z, Z, Z, Z, Z, Z, Z, Z, 12, 13, Z, Z, 14, 15, Z, Z,
+ // 6_F
+ Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z, 12, 13,
+ // 6_G
+ Z, Z, Z, Z, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+ // 6_H
+ 4, 5, 6, 7, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 7_F
+ Z, Z, Z, Z, Z, Z, Z, Z, 14, 15, Z, Z, Z, Z, Z, Z,
+ // 7_G
+ 10, 11, Z, Z, Z, Z, 12, 13, Z, Z, 14, 15, Z, Z, Z, Z,
+ // 7_H
+ Z, Z, 8, 9, 10, 11, Z, Z, Z, Z, Z, Z, 12, 13, 14, 15
+ };
+
+ ///
+ /// Gets shuffle vectors for
+ /// zig zag implementation.
+ ///
+ private static ReadOnlySpan AvxShuffleMasks => new byte[]
+ {
+ // 01_AB/01_EF/23_CD - cross-lane
+ 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0,
+
+ // 01_AB - inner-lane
+ 0, 1, 2, 3, 8, 9, Z, Z, 10, 11, 4, 5, 6, 7, 12, 13, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, 4, 5, 6, 7,
+
+ // 01_CD/23_GH - cross-lane
+ 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, Z, Z, Z, Z, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, Z, Z, Z, Z,
+
+ // 01_CD - inner-lane
+ Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z,
+
+ // 01_EF - inner-lane
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 23_AB/45_CD/67_EF - cross-lane
+ 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, Z, Z, Z, Z, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, Z, Z, Z, Z,
+
+ // 23_AB - inner-lane
+ 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, 0, 1, 2, 3, 8, 9, Z, Z, Z, Z,
+
+ // 23_CD - inner-lane
+ Z, Z, 6, 7, 12, 13, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, 12, 13,
+
+ // 23_EF - inner-lane
+ Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 23_GH - inner-lane
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0, 1, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 45_AB - inner-lane
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 45_CD - inner-lane
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7, 0, 1, Z, Z, 2, 3, 8, 9, Z, Z, Z, Z, Z, Z,
+
+ // 45_EF - cross-lane
+ 1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, Z, Z, Z, Z, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0,
+
+ // 45_EF - inner-lane
+ 2, 3, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, Z, Z,
+
+ // 45_GH - inner-lane
+ Z, Z, Z, Z, 2, 3, 8, 9, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 6, 7,
+
+ // 67_CD - inner-lane
+ Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z,
+
+ // 67_EF - inner-lane
+ Z, Z, Z, Z, Z, Z, 6, 7, 0, 1, Z, Z, 2, 3, 8, 9, Z, Z, Z, Z, Z, Z, Z, Z, 10, 11, Z, Z, Z, Z, Z, Z,
+
+ // 67_GH - inner-lane
+ 8, 9, 10, 11, 4, 5, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 2, 3, 8, 9, 10, 11, 4, 5, Z, Z, 6, 7, 12, 13, 14, 15
+ };
+
+ ///
+ /// Applies zig zag ordering for given 8x8 matrix using SSE cpu intrinsics.
+ ///
+ ///
+ /// Requires Ssse3 support.
+ ///
+ /// Input matrix.
+ /// Matrix to store the result. Can be a reference to input matrix.
+ public static unsafe void ApplyZigZagOrderingSse(ref Block8x8 source, ref Block8x8 dest)
+ {
+ DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!");
+
+ fixed (byte* maskPtr = SseShuffleMasks)
+ {
+ Vector128 A = source.V0.AsByte();
+ Vector128 B = source.V1.AsByte();
+ Vector128 C = source.V2.AsByte();
+ Vector128 D = source.V3.AsByte();
+ Vector128 E = source.V4.AsByte();
+ Vector128 F = source.V5.AsByte();
+ Vector128 G = source.V6.AsByte();
+ Vector128 H = source.V7.AsByte();
+
+ // row0
+ Vector128 row0_A = Ssse3.Shuffle(A, Sse2.LoadVector128(maskPtr + (0 * 16))).AsInt16();
+ Vector128 row0_B = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (1 * 16))).AsInt16();
+ Vector128 row0 = Sse2.Or(row0_A, row0_B);
+ Vector128 row0_C = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (2 * 16))).AsInt16();
+ row0 = Sse2.Or(row0, row0_C);
+
+ // row1
+ Vector128 row1_A = Ssse3.Shuffle(A, Sse2.LoadVector128(maskPtr + (3 * 16))).AsInt16();
+ Vector128 row1_B = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (4 * 16))).AsInt16();
+ Vector128 row1 = Sse2.Or(row1_A, row1_B);
+ Vector128 row1_C = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (5 * 16))).AsInt16();
+ row1 = Sse2.Or(row1, row1_C);
+ Vector128 row1_D = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (6 * 16))).AsInt16();
+ row1 = Sse2.Or(row1, row1_D);
+ Vector128 row1_E = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (7 * 16))).AsInt16();
+ row1 = Sse2.Or(row1, row1_E);
+
+ // row2
+ Vector128 row2_B = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (8 * 16))).AsInt16();
+ Vector128 row2_C = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (9 * 16))).AsInt16();
+ Vector128 row2 = Sse2.Or(row2_B, row2_C);
+ Vector128 row2_D = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (10 * 16))).AsInt16();
+ row2 = Sse2.Or(row2, row2_D);
+ Vector128 row2_E = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (11 * 16))).AsInt16();
+ row2 = Sse2.Or(row2, row2_E);
+ Vector128 row2_F = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (12 * 16))).AsInt16();
+ row2 = Sse2.Or(row2, row2_F);
+ Vector128 row2_G = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (13 * 16))).AsInt16();
+ row2 = Sse2.Or(row2, row2_G);
+
+ // row3
+ Vector128 A_3 = Ssse3.Shuffle(A, Sse2.LoadVector128(maskPtr + (14 * 16))).AsInt16().AsInt16();
+ Vector128 B_3 = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (15 * 16))).AsInt16().AsInt16();
+ Vector128 row3 = Sse2.Or(A_3, B_3);
+ Vector128 C_3 = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (16 * 16))).AsInt16();
+ row3 = Sse2.Or(row3, C_3);
+ Vector128 D3_E4_shuffleMask = Sse2.LoadVector128(maskPtr + (17 * 16));
+ Vector128 D_3 = Ssse3.Shuffle(D, D3_E4_shuffleMask).AsInt16();
+ row3 = Sse2.Or(row3, D_3);
+
+ // row4
+ Vector128 E_4 = Ssse3.Shuffle(E, D3_E4_shuffleMask).AsInt16();
+ Vector128 F_4 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (18 * 16))).AsInt16();
+ Vector128 row4 = Sse2.Or(E_4, F_4);
+ Vector128 G_4 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (19 * 16))).AsInt16();
+ row4 = Sse2.Or(row4, G_4);
+ Vector128 H_4 = Ssse3.Shuffle(H, Sse2.LoadVector128(maskPtr + (20 * 16))).AsInt16();
+ row4 = Sse2.Or(row4, H_4);
+
+ // row5
+ Vector128 B_5 = Ssse3.Shuffle(B, Sse2.LoadVector128(maskPtr + (21 * 16))).AsInt16();
+ Vector128 C_5 = Ssse3.Shuffle(C, Sse2.LoadVector128(maskPtr + (22 * 16))).AsInt16();
+ Vector128 row5 = Sse2.Or(B_5, C_5);
+ Vector128 D_5 = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (23 * 16))).AsInt16();
+ row5 = Sse2.Or(row5, D_5);
+ Vector128 E_5 = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (24 * 16))).AsInt16();
+ row5 = Sse2.Or(row5, E_5);
+ Vector128 F_5 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (25 * 16))).AsInt16();
+ row5 = Sse2.Or(row5, F_5);
+ Vector128 G_5 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (26 * 16))).AsInt16();
+ row5 = Sse2.Or(row5, G_5);
+
+ // row6
+ Vector128 D_6 = Ssse3.Shuffle(D, Sse2.LoadVector128(maskPtr + (27 * 16))).AsInt16();
+ Vector128 E_6 = Ssse3.Shuffle(E, Sse2.LoadVector128(maskPtr + (28 * 16))).AsInt16();
+ Vector128 row6 = Sse2.Or(D_6, E_6);
+ Vector128 F_6 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (29 * 16))).AsInt16();
+ row6 = Sse2.Or(row6, F_6);
+ Vector128 G_6 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (30 * 16))).AsInt16();
+ row6 = Sse2.Or(row6, G_6);
+ Vector128 H_6 = Ssse3.Shuffle(H, Sse2.LoadVector128(maskPtr + (31 * 16))).AsInt16();
+ row6 = Sse2.Or(row6, H_6);
+
+ // row7
+ Vector128 F_7 = Ssse3.Shuffle(F, Sse2.LoadVector128(maskPtr + (32 * 16))).AsInt16();
+ Vector128 G_7 = Ssse3.Shuffle(G, Sse2.LoadVector128(maskPtr + (33 * 16))).AsInt16();
+ Vector128 row7 = Sse2.Or(F_7, G_7);
+ Vector128 H_7 = Ssse3.Shuffle(H, Sse2.LoadVector128(maskPtr + (35 * 16))).AsInt16();
+ row7 = Sse2.Or(row7, H_7);
+
+ dest.V0 = row0;
+ dest.V1 = row1;
+ dest.V2 = row2;
+ dest.V3 = row3;
+ dest.V4 = row4;
+ dest.V5 = row5;
+ dest.V6 = row6;
+ dest.V7 = row7;
+ }
+ }
+
+ ///
+ /// Applies zig zag ordering for given 8x8 matrix using AVX cpu intrinsics.
+ ///
+ ///
+ /// Requires Avx2 support.
+ ///
+ /// Input matrix.
+ /// Matrix to store the result. Can be a reference to input matrix.
+ public static unsafe void ApplyZigZagOrderingAvx(ref Block8x8 source, ref Block8x8 dest)
+ {
+ DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
+
+ fixed (byte* shuffleVectorsPtr = AvxShuffleMasks)
+ {
+ // 18 loads
+ // 10 cross-lane shuffles (permutations)
+ // 14 shuffles
+ // 10 bitwise or's
+ // 4 stores
+
+ // A0 A1 A2 A3 A4 A5 A6 A7 | B0 B1 B2 B3 B4 B5 B6 B7
+ // C0 C1 C2 C3 C4 C5 C6 C7 | D0 D1 D2 D3 D4 D5 D6 D7
+ // E0 E1 E2 E3 E4 E5 E6 E7 | F0 F1 F2 F3 F4 F5 F6 F7
+ // G0 G1 G2 G3 G4 G5 G6 G7 | H0 H1 H2 H3 H4 H5 H6 H7
+ Vector256 AB = source.V01.AsByte();
+ Vector256 CD = source.V23.AsByte();
+ Vector256 EF = source.V45.AsByte();
+ Vector256 GH = source.V67.AsByte();
+
+ // row01 - A0 A1 B0 C0 B1 A2 A3 B2 | C1 D0 E0 D1 C2 B3 A4 A5
+ Vector256 AB01_EF01_CD23_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32();
+
+ // row01_AB - (A0 A1) (B0 B1) (A2 A3) (B2 B3) | (B2 B3) (A4 A5) (X X) (X X)
+ Vector256 row01_AB = Avx2.PermuteVar8x32(AB.AsInt32(), AB01_EF01_CD23_cr_ln_shfmask).AsByte();
+ // row01_AB - (A0 A1) (B0 X) (B1 A2) (A3 B2) | (X X) (X X) (X B3) (A4 A5)
+ row01_AB = Avx2.Shuffle(row01_AB, Avx.LoadVector256(shuffleVectorsPtr + (1 * 32))).AsByte();
+
+ Vector256 CD01_GH23_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (2 * 32)).AsInt32();
+
+ // row01_CD - (C0 C1) (X X) (X X) (X X) | (C0 C1) (D0 D1) (C2 C3) (X X)
+ Vector256 row01_CD = Avx2.PermuteVar8x32(CD.AsInt32(), CD01_GH23_cr_ln_shfmask).AsByte();
+ // row01_CD - (X X) (X C0) (X X) (X X) | (C1 D0) (X D1) (C2 X) (X X)
+ row01_CD = Avx2.Shuffle(row01_CD, Avx.LoadVector256(shuffleVectorsPtr + (3 * 32))).AsByte();
+
+ // row01_EF - (E0 E1) (E2 E3) (F0 F1) (X X) | (E0 E1) (X X) (X X) (X X)
+ Vector256 row0123_EF = Avx2.PermuteVar8x32(EF.AsInt32(), AB01_EF01_CD23_cr_ln_shfmask).AsByte();
+ // row01_EF - (X X) (X X) (X X) (X X) | (X X) (E0 X) (X X) (X X)
+ Vector256 row01_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (4 * 32))).AsByte();
+
+ Vector256 row01 = Avx2.Or(Avx2.Or(row01_AB, row01_CD), row01_EF);
+
+
+ // row23 - B4 C3 D2 E1 F0 G0 F1 E2 | D3 C4 B5 A6 A7 B6 C5 D4
+
+ Vector256 AB23_CD45_EF67_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (5 * 32)).AsInt32();
+
+ // row23_AB - (B4 B5) (X X) (X X) (X X) | (B4 B5) (B6 B7) (A6 A7) (X X)
+ Vector256 row2345_AB = Avx2.PermuteVar8x32(AB.AsInt32(), AB23_CD45_EF67_cr_ln_shfmask).AsByte();
+ // row23_AB - (B4 X) (X X) (X X) (X X) | (X X) (B5 A6) (A7 B6) (X X)
+ Vector256 row23_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (6 * 32))).AsByte();
+
+ // row23_CD - (C2 C3) (D2 D3) (X X) (X X) | (D2 D3) (C4 C5) (D4 D5) (X X)
+ Vector256 row23_CD = Avx2.PermuteVar8x32(CD.AsInt32(), AB01_EF01_CD23_cr_ln_shfmask).AsByte();
+ // row23_CD - (X C3) (D2 X) (X X) (X X) | (D3 C4) (X X) (X X) (C5 D4)
+ row23_CD = Avx2.Shuffle(row23_CD, Avx.LoadVector256(shuffleVectorsPtr + (7 * 32))).AsByte();
+
+ // row23_EF - (X X) (X E1) (F0 X) (F1 E2) | (X X) (X X) (X X) (X X)
+ Vector256 row23_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (8 * 32))).AsByte();
+
+ // row23_GH - (G0 G1) (G2 G3) (H0 H1) (X X) | (G2 G3) (X X) (X X) (X X)
+ Vector256 row2345_GH = Avx2.PermuteVar8x32(GH.AsInt32(), CD01_GH23_cr_ln_shfmask).AsByte();
+ // row23_GH - (X X) (X X) (X G0) (X X) | (X X) (X X) (X X) (X X)
+ Vector256 row23_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (9 * 32)).AsByte());
+
+ Vector256 row23 = Avx2.Or(Avx2.Or(row23_AB, row23_CD), Avx2.Or(row23_EF, row23_GH));
+
+
+ // row45 - E3 F2 G1 H0 H1 G2 F3 E4 | D5 C6 B7 C7 D6 E5 F4 G3
+
+ // row45_AB - (X X) (X X) (X X) (X X) | (X X) (B7 X) (X X) (X X)
+ Vector256 row45_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (10 * 32)).AsByte());
+
+ // row45_CD - (D6 D7) (X X) (X X) (X X) | (C6 C7) (D4 D5) (D6 D7) (X X)
+ Vector256 row4567_CD = Avx2.PermuteVar8x32(CD.AsInt32(), AB23_CD45_EF67_cr_ln_shfmask).AsByte();
+ // row45_CD - (X X) (X X) (X X) (X X) | (D5 C6) (X C7) (D6 X) (X X)
+ Vector256 row45_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (11 * 32)).AsByte());
+
+ Vector256 EF45_GH67_cr_ln_shfmask = Avx.LoadVector256(shuffleVectorsPtr + (12 * 32)).AsInt32();
+
+ // row45_EF - (E2 E3) (E4 E5) (F2 F3) (X X) | (E4 E5) (F4 F5) (X X) (X X)
+ Vector256 row45_EF = Avx2.PermuteVar8x32(EF.AsInt32(), EF45_GH67_cr_ln_shfmask).AsByte();
+ // row45_EF - (E3 F2) (X X) (X X) (F3 E4) | (X X) (X X) (X E5) (F4 X)
+ row45_EF = Avx2.Shuffle(row45_EF, Avx.LoadVector256(shuffleVectorsPtr + (13 * 32)).AsByte());
+
+ // row45_GH - (X X) (G1 H0) (H1 G2) (X X) | (X X) (X X) (X X) (X G3)
+ Vector256 row45_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (14 * 32)).AsByte());
+
+ Vector256 row45 = Avx2.Or(Avx2.Or(row45_AB, row45_CD), Avx2.Or(row45_EF, row45_GH));
+
+
+ // row67 - H2 H3 G4 F5 E6 D7 E7 F6 | G5 H4 H5 G6 F7 G7 H6 H7
+
+ // row67_CD - (X X) (X X) (X D7) (X X) | (X X) (X X) (X X) (X X)
+ Vector256 row67_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (15 * 32)).AsByte());
+
+ // row67_EF - (E6 E7) (F4 F5) (F6 F7) (X X) | (F6 F7) (X X) (X X) (X X)
+ Vector256 row67_EF = Avx2.PermuteVar8x32(EF.AsInt32(), AB23_CD45_EF67_cr_ln_shfmask).AsByte();
+ // row67_EF - (X X) (X F5) (E6 X) (E7 F6) | (X X) (X X) (F7 X) (X X)
+ row67_EF = Avx2.Shuffle(row67_EF, Avx.LoadVector256(shuffleVectorsPtr + (16 * 32)).AsByte());
+
+ // row67_GH - (G4 G5) (H2 H3) (X X) (X X) | (G4 G5) (G6 G7) (H4 H5) (H6 H7)
+ Vector256 row67_GH = Avx2.PermuteVar8x32(GH.AsInt32(), EF45_GH67_cr_ln_shfmask).AsByte();
+ // row67_GH - (H2 H3) (G4 X) (X X) (X X) | (G5 H4) (H5 G6) (X G7) (H6 H7)
+ row67_GH = Avx2.Shuffle(row67_GH, Avx.LoadVector256(shuffleVectorsPtr + (17 * 32)).AsByte());
+
+ Vector256 row67 = Avx2.Or(Avx2.Or(row67_CD, row67_EF), row67_GH);
+
+ dest.V01 = row01.AsInt16();
+ dest.V23 = row23.AsInt16();
+ dest.V45 = row45.AsInt16();
+ dest.V67 = row67.AsInt16();
+ }
+ }
+ }
+}
+#endif
diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs
index 737652d4e6..c2b0fc5d0f 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs
@@ -4,19 +4,17 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
- ///
- /// Holds the Jpeg UnZig array in a value/stack type.
- /// Unzig maps from the zigzag ordering to the natural ordering. For example,
- /// unzig[3] is the column and row of the fourth element in zigzag order. The
- /// value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
- ///
- [StructLayout(LayoutKind.Sequential)]
- internal unsafe struct ZigZag
+ internal static partial class ZigZag
{
///
+ /// Gets span of zig-zag ordering indices.
+ ///
+ ///
/// When reading corrupted data, the Huffman decoders could attempt
/// to reference an entry beyond the end of this array (if the decoded
/// zero run length reaches past the end of the block). To prevent
@@ -25,20 +23,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// to be stored in location 63 of the block, not somewhere random.
/// The worst case would be a run-length of 15, which means we need 16
/// fake entries.
- ///
- private const int Size = 64 + 16;
-
- ///
- /// Copy of in a value type
- ///
- public fixed byte Data[Size];
-
- ///
- /// Gets the unzigs map, which maps from the zigzag ordering to the natural ordering.
- /// For example, unzig[3] is the column and row of the fourth element in zigzag order.
- /// The value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
- ///
- private static ReadOnlySpan Unzig => new byte[]
+ ///
+ public static ReadOnlySpan ZigZagOrder => new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
@@ -48,53 +34,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63,
- 63, 63, 63, 63, 63, 63, 63, 63, // Extra entries for safety in decoder
+
+ // Extra entries for safety in decoder
+ 63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63
};
-
- ///
- /// Returns the value at the given index
- ///
- /// The index
- /// The
- public byte this[int idx]
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get
- {
- ref byte self = ref Unsafe.As(ref this);
- return Unsafe.Add(ref self, idx);
- }
- }
-
- ///
- /// Creates and fills an instance of with Jpeg unzig indices
- ///
- /// The new instance
- public static ZigZag CreateUnzigTable()
- {
- ZigZag result = default;
- ref byte sourceRef = ref MemoryMarshal.GetReference(Unzig);
- ref byte destinationRef = ref Unsafe.AsRef(result.Data);
-
- Unzig.CopyTo(new Span(result.Data, Size));
-
- return result;
- }
-
- ///
- /// Apply Zigging to the given quantization table, so it will be sufficient to multiply blocks for dequantizing them.
- ///
- public static Block8x8F CreateDequantizationTable(ref Block8x8F qt)
- {
- Block8x8F result = default;
-
- for (int i = 0; i < Block8x8F.Size; i++)
- {
- result[Unzig[i]] = qt[i];
- }
-
- return result;
- }
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
index e94b07faae..4770542649 100644
--- a/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
+++ b/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
@@ -740,9 +740,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
stream.Read(this.temp, 0, 64);
remaining -= 64;
+ // Parsing quantization table & saving it in natural order
for (int j = 0; j < 64; j++)
{
- table[j] = this.temp[j];
+ table[ZigZag.ZigZagOrder[j]] = this.temp[j];
}
break;
@@ -760,9 +761,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
stream.Read(this.temp, 0, 128);
remaining -= 128;
+ // Parsing quantization table & saving it in natural order
for (int j = 0; j < 64; j++)
{
- table[j] = (this.temp[2 * j] << 8) | this.temp[(2 * j) + 1];
+ table[ZigZag.ZigZagOrder[j]] = (this.temp[2 * j] << 8) | this.temp[(2 * j) + 1];
}
break;
diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
index 8c6726e65e..85a2c6846e 100644
--- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
+++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
@@ -151,7 +151,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
dqt[offset++] = (byte)i;
for (int j = 0; j < Block8x8F.Size; j++)
{
- dqt[offset++] = (byte)quant[j];
+ dqt[offset++] = (byte)quant[ZigZag.ZigZagOrder[j]];
}
}
@@ -635,11 +635,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
/// Initializes quntization tables.
///
///
+ ///
+ /// Zig-zag ordering is NOT applied to the resulting tables.
+ ///
+ ///
/// We take quality values in a hierarchical order:
/// 1. Check if encoder has set quality
- /// 2. Check if metadata has special table for encoding
- /// 3. Check if metadata has set quality
- /// 4. Take default quality value - 75
+ /// 2. Check if metadata has set quality
+ /// 3. Take default quality value - 75
+ ///
///
/// Color components count.
/// Jpeg metadata instance.
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
index 42fdd603e9..fc642dcc79 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
@@ -272,32 +272,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.CompareBlocks(expected, actual, 0);
}
+ // TODO: intrinsic tests
[Theory]
- [InlineData(1)]
- [InlineData(2)]
- public unsafe void Quantize(int seed)
+ [InlineData(1, 2)]
+ [InlineData(2, 1)]
+ public void Quantize(int srcSeed, int qtSeed)
{
- var block = default(Block8x8F);
- block.LoadFrom(Create8x8RoundedRandomFloatData(-2000, 2000, seed));
-
- var qt = default(Block8x8F);
- qt.LoadFrom(Create8x8RoundedRandomFloatData(-2000, 2000, seed));
-
- var unzig = ZigZag.CreateUnzigTable();
+ Block8x8F source = CreateRandomFloatBlock(-2000, 2000, srcSeed);
+ Block8x8F quant = CreateRandomFloatBlock(-2000, 2000, qtSeed);
- int* expectedResults = stackalloc int[Block8x8F.Size];
- ReferenceImplementations.QuantizeRational(&block, expectedResults, &qt, unzig.Data);
+ Block8x8 expected = default;
+ ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.ZigZagOrder);
- var actualResults = default(Block8x8F);
+ Block8x8 actual = default;
+ Block8x8F.Quantize(ref source, ref actual, ref quant);
- Block8x8F.Quantize(ref block, ref actualResults, ref qt, ref unzig);
-
- for (int i = 0; i < Block8x8F.Size; i++)
+ for (int i = 0; i < Block8x8.Size; i++)
{
- int expected = expectedResults[i];
- int actual = (int)actualResults[i];
-
- Assert.Equal(expected, actual);
+ Assert.Equal(expected[i], actual[i]);
}
}
@@ -368,48 +360,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
- [Theory]
- [InlineData(1)]
- [InlineData(2)]
- [InlineData(3)]
- public unsafe void DequantizeBlock(int seed)
- {
- Block8x8F original = CreateRandomFloatBlock(-500, 500, seed);
- Block8x8F qt = CreateRandomFloatBlock(0, 10, seed + 42);
-
- var unzig = ZigZag.CreateUnzigTable();
-
- Block8x8F expected = original;
- Block8x8F actual = original;
-
- ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data);
- Block8x8F.DequantizeBlock(&actual, &qt, unzig.Data);
-
- this.CompareBlocks(expected, actual, 0);
- }
-
- [Theory]
- [InlineData(1)]
- [InlineData(2)]
- [InlineData(3)]
- public unsafe void ZigZag_CreateDequantizationTable_MultiplicationShouldQuantize(int seed)
- {
- Block8x8F original = CreateRandomFloatBlock(-500, 500, seed);
- Block8x8F qt = CreateRandomFloatBlock(0, 10, seed + 42);
-
- var unzig = ZigZag.CreateUnzigTable();
- Block8x8F zigQt = ZigZag.CreateDequantizationTable(ref qt);
-
- Block8x8F expected = original;
- Block8x8F actual = original;
-
- ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data);
-
- actual.MultiplyInPlace(ref zigQt);
-
- this.CompareBlocks(expected, actual, 0);
- }
-
[Fact]
public void AddToAllInPlace()
{
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs
index 03f7020c09..4505ef5386 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs
@@ -21,7 +21,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Block8x8F table = JpegQuantization.ScaleLuminanceTable(quality);
int estimatedQuality = JpegQuantization.EstimateLuminanceQuality(ref table);
- Assert.True(quality.Equals(estimatedQuality), $"Failed to estimate luminance quality for standard table at quality level {quality}");
+ Assert.True(
+ quality.Equals(estimatedQuality),
+ $"Failed to estimate luminance quality for standard table at quality level {quality}");
}
}
@@ -35,7 +37,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Block8x8F table = JpegQuantization.ScaleChrominanceTable(quality);
int estimatedQuality = JpegQuantization.EstimateChrominanceQuality(ref table);
- Assert.True(quality.Equals(estimatedQuality), $"Failed to estimate chrominance quality for standard table at quality level {quality}");
+ Assert.True(
+ quality.Equals(estimatedQuality),
+ $"Failed to estimate chrominance quality for standard table at quality level {quality}");
}
}
}
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs
index 2c673f30ee..aa98a7379b 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs
@@ -15,18 +15,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
///
internal static partial class ReferenceImplementations
{
- public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
+ public static void DequantizeBlock(ref Block8x8F block, ref Block8x8F qt, ReadOnlySpan zigzag)
{
- float* b = (float*)blockPtr;
- float* qtp = (float*)qtPtr;
- for (int qtIndex = 0; qtIndex < Block8x8F.Size; qtIndex++)
+ for (int i = 0; i < Block8x8F.Size; i++)
{
- byte i = unzigPtr[qtIndex];
- float* unzigPos = b + i;
-
- float val = *unzigPos;
- val *= qtp[qtIndex];
- *unzigPos = val;
+ int zig = zigzag[i];
+ block[zig] *= qt[i];
}
}
@@ -101,42 +95,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
///
/// Reference implementation to test .
- /// Rounding is done used an integer-based algorithm defined in .
///
- /// The input block
- /// The destination block of integers
- /// The quantization table
- /// Pointer to
- public static unsafe void QuantizeRational(Block8x8F* src, int* dest, Block8x8F* qt, byte* unzigPtr)
+ /// The input block.
+ /// The destination block of 16bit integers.
+ /// The quantization table.
+ /// Zig-Zag index sequence span.
+ public static void Quantize(ref Block8x8F src, ref Block8x8 dest, ref Block8x8F qt, ReadOnlySpan zigzag)
{
- float* s = (float*)src;
- float* q = (float*)qt;
-
- for (int zig = 0; zig < Block8x8F.Size; zig++)
+ for (int i = 0; i < Block8x8F.Size; i++)
{
- int a = (int)s[unzigPtr[zig]];
- int b = (int)q[zig];
-
- int val = RationalRound(a, b);
- dest[zig] = val;
+ int zig = zigzag[i];
+ dest[i] = (short)Math.Round(src[zig] / qt[zig], MidpointRounding.AwayFromZero);
}
}
-
- ///
- /// Rounds a rational number defined as dividend/divisor into an integer.
- ///
- /// The dividend.
- /// The divisor.
- /// The rounded value.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static int RationalRound(int dividend, int divisor)
- {
- if (dividend >= 0)
- {
- return (dividend + (divisor >> 1)) / divisor;
- }
-
- return -((-dividend + (divisor >> 1)) / divisor);
- }
}
}
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs
index e03cf9958f..39046438a8 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs
@@ -13,8 +13,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void ZigZagCanHandleAllPossibleCoefficients()
{
// Mimic the behaviour of the huffman scan decoder using all possible byte values
- var block = new short[64];
- var zigzag = ZigZag.CreateUnzigTable();
+ short[] block = new short[64];
for (int h = 0; h < 255; h++)
{
@@ -27,7 +26,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
if (s != 0)
{
i += r;
- block[zigzag[i++]] = (short)s;
+ block[ZigZag.ZigZagOrder[i++]] = (short)s;
}
else
{