diff --git a/src/ImageSharp/Formats/Jpeg/Port/Components/IDCT.cs b/src/ImageSharp/Formats/Jpeg/Port/Components/IDCT.cs
index 65c2bbde60..064b3bea36 100644
--- a/src/ImageSharp/Formats/Jpeg/Port/Components/IDCT.cs
+++ b/src/ImageSharp/Formats/Jpeg/Port/Components/IDCT.cs
@@ -6,24 +6,38 @@
using ImageSharp.Memory;
///
- /// Performa the invers
+ /// Performs the inverse Descrete Cosine Transform on each frame component.
///
internal static class IDCT
{
- private const int DctCos1 = 4017; // cos(pi/16)
- private const int DctSin1 = 799; // sin(pi/16)
- private const int DctCos3 = 3406; // cos(3*pi/16)
- private const int DctSin3 = 2276; // sin(3*pi/16)
- private const int DctCos6 = 1567; // cos(6*pi/16)
- private const int DctSin6 = 3784; // sin(6*pi/16)
- private const int DctSqrt2 = 5793; // sqrt(2)
+ ///
+ /// Precomputed values scaled up by 14 bits
+ ///
+ public static readonly short[] Aanscales =
+ {
+ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855,
+ 12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585,
+ 5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
+ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873,
+ 17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299,
+ 11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315,
+ 4520, 3552, 2446, 1247
+ };
+
+ private const int DctCos1 = 4017; // cos(pi/16)
+ private const int DctSin1 = 799; // sin(pi/16)
+ private const int DctCos3 = 3406; // cos(3*pi/16)
+ private const int DctSin3 = 2276; // sin(3*pi/16)
+ private const int DctCos6 = 1567; // cos(6*pi/16)
+ private const int DctSin6 = 3784; // sin(6*pi/16)
+ private const int DctSqrt2 = 5793; // sqrt(2)
private const int DctSqrt1D2 = 2896; // sqrt(2) / 2
#pragma warning disable SA1310 // Field names must not contain underscore
- private const int FIX_1_082392200 = 277; /* FIX(1.082392200) */
- private const int FIX_1_414213562 = 362; /* FIX(1.414213562) */
- private const int FIX_1_847759065 = 473; /* FIX(1.847759065) */
- private const int FIX_2_613125930 = 669; /* FIX(2.613125930) */
+ private const int FIX_1_082392200 = 277; // FIX(1.082392200)
+ private const int FIX_1_414213562 = 362; // FIX(1.414213562)
+ private const int FIX_1_847759065 = 473; // FIX(1.847759065)
+ private const int FIX_2_613125930 = 669; // FIX(2.613125930)
#pragma warning restore SA1310 // Field names must not contain underscore
private const int ConstBits = 8;
@@ -42,21 +56,9 @@
// be quite far out of range if the input data is corrupt, so a bulletproof
// range-limiting step is required. We use a mask-and-table-lookup method
// to do the combined operations quickly, assuming that MaxJSample+1
- // is a power of 2. See the comments with prepare_range_limit_table for more info.
+ // is a power of 2.
private const int RangeMask = (MaxJSample * 4) + 3; // 2 bits wider than legal samples
- // Precomputed values scaled up by 14 bits
- private static readonly short[] Aanscales =
- {
- 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855,
- 12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585,
- 5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
- 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873,
- 17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299,
- 11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315,
- 4520, 3552, 2446, 1247
- };
-
private static readonly byte[] Limit = new byte[5 * (MaxJSample + 1)];
static IDCT()
@@ -81,15 +83,13 @@
/// 'Practical Fast 1-D DCT Algorithms with 11 Multiplications',
/// IEEE Intl. Conf. on Acoustics, Speech & Signal Processing, 1989, 988-991.
///
- /// The quantization tables
/// The fram component
/// The block buffer offset
/// The computational buffer for holding temp values
- public static void QuantizeAndInverse(QuantizationTables quantizationTables, ref FrameComponent component, int blockBufferOffset, Buffer computationBuffer)
+ /// The quantization table
+ public static void QuantizeAndInverse(ref FrameComponent component, int blockBufferOffset, ref Span computationBuffer, ref Span quantizationTable)
{
- Span qt = quantizationTables.Tables.GetRowSpan(component.QuantizationIdentifier);
Span blockData = component.BlockData.Slice(blockBufferOffset);
- Span computationBufferSpan = computationBuffer;
int v0, v1, v2, v3, v4, v5, v6, v7;
int p0, p1, p2, p3, p4, p5, p6, p7;
int t;
@@ -108,32 +108,32 @@
p7 = blockData[row + 7];
// dequant p0
- p0 *= qt[row];
+ p0 *= quantizationTable[row];
// check for all-zero AC coefficients
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0)
{
t = ((DctSqrt2 * p0) + 512) >> 10;
short st = (short)t;
- computationBufferSpan[row] = st;
- computationBufferSpan[row + 1] = st;
- computationBufferSpan[row + 2] = st;
- computationBufferSpan[row + 3] = st;
- computationBufferSpan[row + 4] = st;
- computationBufferSpan[row + 5] = st;
- computationBufferSpan[row + 6] = st;
- computationBufferSpan[row + 7] = st;
+ computationBuffer[row] = st;
+ computationBuffer[row + 1] = st;
+ computationBuffer[row + 2] = st;
+ computationBuffer[row + 3] = st;
+ computationBuffer[row + 4] = st;
+ computationBuffer[row + 5] = st;
+ computationBuffer[row + 6] = st;
+ computationBuffer[row + 7] = st;
continue;
}
// dequant p1 ... p7
- p1 *= qt[row + 1];
- p2 *= qt[row + 2];
- p3 *= qt[row + 3];
- p4 *= qt[row + 4];
- p5 *= qt[row + 5];
- p6 *= qt[row + 6];
- p7 *= qt[row + 7];
+ p1 *= quantizationTable[row + 1];
+ p2 *= quantizationTable[row + 2];
+ p3 *= quantizationTable[row + 3];
+ p4 *= quantizationTable[row + 4];
+ p5 *= quantizationTable[row + 5];
+ p6 *= quantizationTable[row + 6];
+ p7 *= quantizationTable[row + 7];
// stage 4
v0 = ((DctSqrt2 * p0) + 128) >> 8;
@@ -169,27 +169,27 @@
v6 = t;
// stage 1
- computationBufferSpan[row] = (short)(v0 + v7);
- computationBufferSpan[row + 7] = (short)(v0 - v7);
- computationBufferSpan[row + 1] = (short)(v1 + v6);
- computationBufferSpan[row + 6] = (short)(v1 - v6);
- computationBufferSpan[row + 2] = (short)(v2 + v5);
- computationBufferSpan[row + 5] = (short)(v2 - v5);
- computationBufferSpan[row + 3] = (short)(v3 + v4);
- computationBufferSpan[row + 4] = (short)(v3 - v4);
+ computationBuffer[row] = (short)(v0 + v7);
+ computationBuffer[row + 7] = (short)(v0 - v7);
+ computationBuffer[row + 1] = (short)(v1 + v6);
+ computationBuffer[row + 6] = (short)(v1 - v6);
+ computationBuffer[row + 2] = (short)(v2 + v5);
+ computationBuffer[row + 5] = (short)(v2 - v5);
+ computationBuffer[row + 3] = (short)(v3 + v4);
+ computationBuffer[row + 4] = (short)(v3 - v4);
}
// inverse DCT on columns
for (int col = 0; col < 8; ++col)
{
- p0 = computationBufferSpan[col];
- p1 = computationBufferSpan[col + 8];
- p2 = computationBufferSpan[col + 16];
- p3 = computationBufferSpan[col + 24];
- p4 = computationBufferSpan[col + 32];
- p5 = computationBufferSpan[col + 40];
- p6 = computationBufferSpan[col + 48];
- p7 = computationBufferSpan[col + 56];
+ p0 = computationBuffer[col];
+ p1 = computationBuffer[col + 8];
+ p2 = computationBuffer[col + 16];
+ p3 = computationBuffer[col + 24];
+ p4 = computationBuffer[col + 32];
+ p5 = computationBuffer[col + 40];
+ p6 = computationBuffer[col + 48];
+ p7 = computationBuffer[col + 56];
// check for all-zero AC coefficients
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0)
@@ -302,195 +302,188 @@
/// precise the scaled value, so this implementation does worse with high -
/// quality - setting files than with low - quality ones.
///
- /// The quantization tables
- /// The fram component
+ /// The frame component
/// The block buffer offset
/// The computational buffer for holding temp values
- public static void QuantizeAndInverseAlt(
- QuantizationTables quantizationTables,
- ref FrameComponent component,
- int blockBufferOffset,
- Buffer computationBuffer)
+ /// The multiplier table
+ public static void QuantizeAndInverseFast(ref FrameComponent component, int blockBufferOffset, ref Span computationBuffer, ref Span multiplierTable)
{
- Span qt = quantizationTables.Tables.GetRowSpan(component.QuantizationIdentifier);
Span blockData = component.BlockData.Slice(blockBufferOffset);
- Span computationBufferSpan = computationBuffer;
-
- // For AA&N IDCT method, multiplier are equal to quantization
- // coefficients scaled by scalefactor[row]*scalefactor[col], where
- // scalefactor[0] = 1
- // scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
- // For integer operation, the multiplier table is to be scaled by 14.
- using (var multiplier = new Buffer(64))
+ int p0, p1, p2, p3, p4, p5, p6, p7;
+
+ for (int col = 0; col < 8; col++)
{
- Span multiplierSpan = multiplier;
- for (int i = 0; i < 64; i++)
+ // Gather block data
+ p0 = blockData[col];
+ p1 = blockData[col + 8];
+ p2 = blockData[col + 16];
+ p3 = blockData[col + 24];
+ p4 = blockData[col + 32];
+ p5 = blockData[col + 40];
+ p6 = blockData[col + 48];
+ p7 = blockData[col + 56];
+
+ int tmp0 = p0 * multiplierTable[col];
+
+ // Due to quantization, we will usually find that many of the input
+ // coefficients are zero, especially the AC terms. We can exploit this
+ // by short-circuiting the IDCT calculation for any column in which all
+ // the AC terms are zero. In that case each output is equal to the
+ // DC coefficient (with scale factor as needed).
+ // With typical images and quantization tables, half or more of the
+ // column DCT calculations can be simplified this way.
+ if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0)
{
- multiplierSpan[i] = (short)Descale(qt[i] * Aanscales[i], 14 - Pass1Bits);
- }
+ short dcval = (short)tmp0;
- int p0, p1, p2, p3, p4, p5, p6, p7;
+ computationBuffer[col] = dcval;
+ computationBuffer[col + 8] = dcval;
+ computationBuffer[col + 16] = dcval;
+ computationBuffer[col + 24] = dcval;
+ computationBuffer[col + 32] = dcval;
+ computationBuffer[col + 40] = dcval;
+ computationBuffer[col + 48] = dcval;
+ computationBuffer[col + 56] = dcval;
- for (int col = 0; col < 8; col++)
- {
- // Gather block data
- p0 = blockData[col];
- p1 = blockData[col + 8];
- p2 = blockData[col + 16];
- p3 = blockData[col + 24];
- p4 = blockData[col + 32];
- p5 = blockData[col + 40];
- p6 = blockData[col + 48];
- p7 = blockData[col + 56];
-
- int tmp0 = p0 * multiplierSpan[col];
-
- // Due to quantization, we will usually find that many of the input
- // coefficients are zero, especially the AC terms. We can exploit this
- // by short-circuiting the IDCT calculation for any column in which all
- // the AC terms are zero. In that case each output is equal to the
- // DC coefficient (with scale factor as needed).
- // With typical images and quantization tables, half or more of the
- // column DCT calculations can be simplified this way.
- if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0)
- {
- short dcval = (short)tmp0;
-
- computationBufferSpan[col] = dcval;
- computationBufferSpan[col + 8] = dcval;
- computationBufferSpan[col + 16] = dcval;
- computationBufferSpan[col + 24] = dcval;
- computationBufferSpan[col + 32] = dcval;
- computationBufferSpan[col + 40] = dcval;
- computationBufferSpan[col + 48] = dcval;
- computationBufferSpan[col + 56] = dcval;
-
- continue;
- }
-
- // Even part
- int tmp1 = p2 * multiplierSpan[col + 16];
- int tmp2 = p4 * multiplierSpan[col + 32];
- int tmp3 = p6 * multiplierSpan[col + 48];
-
- int tmp10 = tmp0 + tmp2; // Phase 3
- int tmp11 = tmp0 - tmp2;
-
- int tmp13 = tmp1 + tmp3; // Phases 5-3
- int tmp12 = Multiply(tmp1 - tmp3, FIX_1_414213562) - tmp13; // 2*c4
-
- tmp0 = tmp10 + tmp13; // Phase 2
- tmp3 = tmp10 - tmp13;
- tmp1 = tmp11 + tmp12;
- tmp2 = tmp11 - tmp12;
-
- // Odd Part
- int tmp4 = p1 * multiplierSpan[col + 8];
- int tmp5 = p3 * multiplierSpan[col + 24];
- int tmp6 = p5 * multiplierSpan[col + 40];
- int tmp7 = p7 * multiplierSpan[col + 56];
-
- int z13 = tmp6 + tmp5; // Phase 6
- int z10 = tmp6 - tmp5;
- int z11 = tmp4 + tmp7;
- int z12 = tmp4 - tmp7;
-
- tmp7 = z11 + z13; // Phase 5
- tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
-
- int z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
- tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
- tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
-
- tmp6 = tmp12 - tmp7; // Phase 2
- tmp5 = tmp11 - tmp6;
- tmp4 = tmp10 - tmp5;
-
- computationBufferSpan[col] = (short)(tmp0 + tmp7);
- computationBufferSpan[col + 56] = (short)(tmp0 - tmp7);
- computationBufferSpan[col + 8] = (short)(tmp1 + tmp6);
- computationBufferSpan[col + 48] = (short)(tmp1 - tmp6);
- computationBufferSpan[col + 16] = (short)(tmp2 + tmp5);
- computationBufferSpan[col + 40] = (short)(tmp2 - tmp5);
- computationBufferSpan[col + 24] = (short)(tmp3 + tmp4);
- computationBufferSpan[col + 32] = (short)(tmp3 - tmp4);
+ continue;
}
- // Pass 2: process rows from work array, store into output array.
- // Note that we must descale the results by a factor of 8 == 2**3,
- // and also undo the pass 1 bits scaling.
- for (int row = 0; row < 64; row += 8)
+ // Even part
+ int tmp1 = p2 * multiplierTable[col + 16];
+ int tmp2 = p4 * multiplierTable[col + 32];
+ int tmp3 = p6 * multiplierTable[col + 48];
+
+ int tmp10 = tmp0 + tmp2; // Phase 3
+ int tmp11 = tmp0 - tmp2;
+
+ int tmp13 = tmp1 + tmp3; // Phases 5-3
+ int tmp12 = Multiply(tmp1 - tmp3, FIX_1_414213562) - tmp13; // 2*c4
+
+ tmp0 = tmp10 + tmp13; // Phase 2
+ tmp3 = tmp10 - tmp13;
+ tmp1 = tmp11 + tmp12;
+ tmp2 = tmp11 - tmp12;
+
+ // Odd Part
+ int tmp4 = p1 * multiplierTable[col + 8];
+ int tmp5 = p3 * multiplierTable[col + 24];
+ int tmp6 = p5 * multiplierTable[col + 40];
+ int tmp7 = p7 * multiplierTable[col + 56];
+
+ int z13 = tmp6 + tmp5; // Phase 6
+ int z10 = tmp6 - tmp5;
+ int z11 = tmp4 + tmp7;
+ int z12 = tmp4 - tmp7;
+
+ tmp7 = z11 + z13; // Phase 5
+ tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
+
+ int z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
+ tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
+ tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
+
+ tmp6 = tmp12 - tmp7; // Phase 2
+ tmp5 = tmp11 - tmp6;
+ tmp4 = tmp10 - tmp5;
+
+ computationBuffer[col] = (short)(tmp0 + tmp7);
+ computationBuffer[col + 56] = (short)(tmp0 - tmp7);
+ computationBuffer[col + 8] = (short)(tmp1 + tmp6);
+ computationBuffer[col + 48] = (short)(tmp1 - tmp6);
+ computationBuffer[col + 16] = (short)(tmp2 + tmp5);
+ computationBuffer[col + 40] = (short)(tmp2 - tmp5);
+ computationBuffer[col + 24] = (short)(tmp3 + tmp4);
+ computationBuffer[col + 32] = (short)(tmp3 - tmp4);
+ }
+
+ // Pass 2: process rows from work array, store into output array.
+ // Note that we must descale the results by a factor of 8 == 2**3,
+ // and also undo the pass 1 bits scaling.
+ for (int row = 0; row < 64; row += 8)
+ {
+ p1 = computationBuffer[row + 1];
+ p2 = computationBuffer[row + 2];
+ p3 = computationBuffer[row + 3];
+ p4 = computationBuffer[row + 4];
+ p5 = computationBuffer[row + 5];
+ p6 = computationBuffer[row + 6];
+ p7 = computationBuffer[row + 7];
+
+ // Add range center and fudge factor for final descale and range-limit.
+ int z5 = computationBuffer[row] + (RangeCenter << (Pass1Bits + 3)) + (1 << (Pass1Bits + 2));
+
+ // Check for all-zero AC coefficients
+ if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0)
{
- p1 = computationBufferSpan[row + 1];
- p2 = computationBufferSpan[row + 2];
- p3 = computationBufferSpan[row + 3];
- p4 = computationBufferSpan[row + 4];
- p5 = computationBufferSpan[row + 5];
- p6 = computationBufferSpan[row + 6];
- p7 = computationBufferSpan[row + 7];
-
- // Add range center and fudge factor for final descale and range-limit.
- int z5 = computationBufferSpan[row] + (RangeCenter << (Pass1Bits + 3)) + (1 << (Pass1Bits + 2));
-
- // Check for all-zero AC coefficients
- if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0)
- {
- byte dcval = Limit[LimitOffset + (RightShift(z5, Pass1Bits + 3) & RangeMask)];
-
- blockData[row] = dcval;
- blockData[row + 1] = dcval;
- blockData[row + 2] = dcval;
- blockData[row + 3] = dcval;
- blockData[row + 4] = dcval;
- blockData[row + 5] = dcval;
- blockData[row + 6] = dcval;
- blockData[row + 7] = dcval;
-
- continue;
- }
-
- // Even part
- int tmp10 = z5 + p4;
- int tmp11 = z5 - p4;
-
- int tmp13 = p2 + p6;
- int tmp12 = Multiply(p2 - p6, FIX_1_414213562) - tmp13; // 2*c4
-
- int tmp0 = tmp10 + tmp13;
- int tmp3 = tmp10 - tmp13;
- int tmp1 = tmp11 + tmp12;
- int tmp2 = tmp11 - tmp12;
-
- // Odd part
- int z13 = p5 + p3;
- int z10 = p5 - p3;
- int z11 = p1 + p7;
- int z12 = p1 - p7;
-
- int tmp7 = z11 + z13; // Phase 5
- tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
-
- z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
- tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
- tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
-
- int tmp6 = tmp12 - tmp7; // Phase 2
- int tmp5 = tmp11 - tmp6;
- int tmp4 = tmp10 - tmp5;
-
- // Final output stage: scale down by a factor of 8, offset, and range-limit
- blockData[row] = Limit[LimitOffset + (RightShift(tmp0 + tmp7, Pass1Bits + 3) & RangeMask)];
- blockData[row + 7] = Limit[LimitOffset + (RightShift(tmp0 - tmp7, Pass1Bits + 3) & RangeMask)];
- blockData[row + 1] = Limit[LimitOffset + (RightShift(tmp1 + tmp6, Pass1Bits + 3) & RangeMask)];
- blockData[row + 6] = Limit[LimitOffset + (RightShift(tmp1 - tmp6, Pass1Bits + 3) & RangeMask)];
- blockData[row + 2] = Limit[LimitOffset + (RightShift(tmp2 + tmp5, Pass1Bits + 3) & RangeMask)];
- blockData[row + 5] = Limit[LimitOffset + (RightShift(tmp2 - tmp5, Pass1Bits + 3) & RangeMask)];
- blockData[row + 3] = Limit[LimitOffset + (RightShift(tmp3 + tmp4, Pass1Bits + 3) & RangeMask)];
- blockData[row + 4] = Limit[LimitOffset + (RightShift(tmp3 - tmp4, Pass1Bits + 3) & RangeMask)];
+ byte dcval = Limit[LimitOffset + (RightShift(z5, Pass1Bits + 3) & RangeMask)];
+
+ blockData[row] = dcval;
+ blockData[row + 1] = dcval;
+ blockData[row + 2] = dcval;
+ blockData[row + 3] = dcval;
+ blockData[row + 4] = dcval;
+ blockData[row + 5] = dcval;
+ blockData[row + 6] = dcval;
+ blockData[row + 7] = dcval;
+
+ continue;
}
+
+ // Even part
+ int tmp10 = z5 + p4;
+ int tmp11 = z5 - p4;
+
+ int tmp13 = p2 + p6;
+ int tmp12 = Multiply(p2 - p6, FIX_1_414213562) - tmp13; // 2*c4
+
+ int tmp0 = tmp10 + tmp13;
+ int tmp3 = tmp10 - tmp13;
+ int tmp1 = tmp11 + tmp12;
+ int tmp2 = tmp11 - tmp12;
+
+ // Odd part
+ int z13 = p5 + p3;
+ int z10 = p5 - p3;
+ int z11 = p1 + p7;
+ int z12 = p1 - p7;
+
+ int tmp7 = z11 + z13; // Phase 5
+ tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
+
+ z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
+ tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
+ tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
+
+ int tmp6 = tmp12 - tmp7; // Phase 2
+ int tmp5 = tmp11 - tmp6;
+ int tmp4 = tmp10 - tmp5;
+
+ // Final output stage: scale down by a factor of 8, offset, and range-limit
+ blockData[row] = Limit[LimitOffset + (RightShift(tmp0 + tmp7, Pass1Bits + 3) & RangeMask)];
+ blockData[row + 7] = Limit[LimitOffset + (RightShift(tmp0 - tmp7, Pass1Bits + 3) & RangeMask)];
+ blockData[row + 1] = Limit[LimitOffset + (RightShift(tmp1 + tmp6, Pass1Bits + 3) & RangeMask)];
+ blockData[row + 6] = Limit[LimitOffset + (RightShift(tmp1 - tmp6, Pass1Bits + 3) & RangeMask)];
+ blockData[row + 2] = Limit[LimitOffset + (RightShift(tmp2 + tmp5, Pass1Bits + 3) & RangeMask)];
+ blockData[row + 5] = Limit[LimitOffset + (RightShift(tmp2 - tmp5, Pass1Bits + 3) & RangeMask)];
+ blockData[row + 3] = Limit[LimitOffset + (RightShift(tmp3 + tmp4, Pass1Bits + 3) & RangeMask)];
+ blockData[row + 4] = Limit[LimitOffset + (RightShift(tmp3 - tmp4, Pass1Bits + 3) & RangeMask)];
}
}
+ ///
+ /// Descale and correctly round an int value that's scaled by bits.
+ /// We assume rounds towards minus infinity, so adding
+ /// the fudge factor is correct for either sign of .
+ ///
+ /// The value
+ /// The number of bits
+ /// The
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int Descale(int value, int n)
+ {
+ return RightShift(value + (1 << (n - 1)), n);
+ }
+
///
/// Multiply a variable by an int constant, and immediately descale.
///
@@ -514,19 +507,5 @@
{
return value >> shift;
}
-
- ///
- /// Descale and correctly round an int value that's scaled by bits.
- /// We assume rounds towards minus infinity, so adding
- /// the fudge factor is correct for either sign of .
- ///
- /// The value
- /// The number of bits
- /// The
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static int Descale(int value, int n)
- {
- return RightShift(value + (1 << (n - 1)), n);
- }
}
}
\ No newline at end of file
diff --git a/src/ImageSharp/Formats/Jpeg/Port/JpegDecoderCore.cs b/src/ImageSharp/Formats/Jpeg/Port/JpegDecoderCore.cs
index ef49dfaf06..074ee3cfdc 100644
--- a/src/ImageSharp/Formats/Jpeg/Port/JpegDecoderCore.cs
+++ b/src/ImageSharp/Formats/Jpeg/Port/JpegDecoderCore.cs
@@ -792,13 +792,28 @@ namespace ImageSharp.Formats.Jpeg.Port
int blocksPerLine = component.BlocksPerLine;
int blocksPerColumn = component.BlocksPerColumn;
using (var computationBuffer = Buffer.CreateClean(64))
- {
+ using (var multiplicationBuffer = Buffer.CreateClean(64))
+ {
+ Span quantizationTable = this.quantizationTables.Tables.GetRowSpan(frameComponent.QuantizationIdentifier);
+ Span computationBufferSpan = computationBuffer;
+
+ // For AA&N IDCT method, multiplier are equal to quantization
+ // coefficients scaled by scalefactor[row]*scalefactor[col], where
+ // scalefactor[0] = 1
+ // scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
+ // For integer operation, the multiplier table is to be scaled by 12.
+ Span multiplierSpan = multiplicationBuffer;
+ for (int i = 0; i < 64; i++)
+ {
+ multiplierSpan[i] = (short)IDCT.Descale(quantizationTable[i] * IDCT.Aanscales[i], 12);
+ }
+
for (int blockRow = 0; blockRow < blocksPerColumn; blockRow++)
{
for (int blockCol = 0; blockCol < blocksPerLine; blockCol++)
{
int offset = GetBlockBufferOffset(ref component, blockRow, blockCol);
- IDCT.QuantizeAndInverseAlt(this.quantizationTables, ref frameComponent, offset, computationBuffer);
+ IDCT.QuantizeAndInverseFast(ref frameComponent, offset, ref computationBufferSpan, ref multiplierSpan);
}
}
}
@@ -808,7 +823,6 @@ namespace ImageSharp.Formats.Jpeg.Port
///
/// Builds the huffman tables
- /// TODO: This is our bottleneck. We should use a faster algorithm with a LUT.
///
/// The tables
/// The table index