|
|
|
@ -6,24 +6,38 @@ |
|
|
|
using ImageSharp.Memory; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Performa the invers
|
|
|
|
/// Performs the inverse Descrete Cosine Transform on each frame component.
|
|
|
|
/// </summary>
|
|
|
|
internal static class IDCT |
|
|
|
{ |
|
|
|
private const int DctCos1 = 4017; // cos(pi/16)
|
|
|
|
private const int DctSin1 = 799; // sin(pi/16)
|
|
|
|
private const int DctCos3 = 3406; // cos(3*pi/16)
|
|
|
|
private const int DctSin3 = 2276; // sin(3*pi/16)
|
|
|
|
private const int DctCos6 = 1567; // cos(6*pi/16)
|
|
|
|
private const int DctSin6 = 3784; // sin(6*pi/16)
|
|
|
|
private const int DctSqrt2 = 5793; // sqrt(2)
|
|
|
|
/// <summary>
|
|
|
|
/// Precomputed values scaled up by 14 bits
|
|
|
|
/// </summary>
|
|
|
|
public static readonly short[] Aanscales = |
|
|
|
{ |
|
|
|
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855, |
|
|
|
12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585, |
|
|
|
5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, |
|
|
|
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873, |
|
|
|
17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299, |
|
|
|
11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315, |
|
|
|
4520, 3552, 2446, 1247 |
|
|
|
}; |
|
|
|
|
|
|
|
private const int DctCos1 = 4017; // cos(pi/16)
|
|
|
|
private const int DctSin1 = 799; // sin(pi/16)
|
|
|
|
private const int DctCos3 = 3406; // cos(3*pi/16)
|
|
|
|
private const int DctSin3 = 2276; // sin(3*pi/16)
|
|
|
|
private const int DctCos6 = 1567; // cos(6*pi/16)
|
|
|
|
private const int DctSin6 = 3784; // sin(6*pi/16)
|
|
|
|
private const int DctSqrt2 = 5793; // sqrt(2)
|
|
|
|
private const int DctSqrt1D2 = 2896; // sqrt(2) / 2
|
|
|
|
|
|
|
|
#pragma warning disable SA1310 // Field names must not contain underscore
|
|
|
|
private const int FIX_1_082392200 = 277; /* FIX(1.082392200) */ |
|
|
|
private const int FIX_1_414213562 = 362; /* FIX(1.414213562) */ |
|
|
|
private const int FIX_1_847759065 = 473; /* FIX(1.847759065) */ |
|
|
|
private const int FIX_2_613125930 = 669; /* FIX(2.613125930) */ |
|
|
|
private const int FIX_1_082392200 = 277; // FIX(1.082392200)
|
|
|
|
private const int FIX_1_414213562 = 362; // FIX(1.414213562)
|
|
|
|
private const int FIX_1_847759065 = 473; // FIX(1.847759065)
|
|
|
|
private const int FIX_2_613125930 = 669; // FIX(2.613125930)
|
|
|
|
#pragma warning restore SA1310 // Field names must not contain underscore
|
|
|
|
|
|
|
|
private const int ConstBits = 8; |
|
|
|
@ -42,21 +56,9 @@ |
|
|
|
// be quite far out of range if the input data is corrupt, so a bulletproof
|
|
|
|
// range-limiting step is required. We use a mask-and-table-lookup method
|
|
|
|
// to do the combined operations quickly, assuming that MaxJSample+1
|
|
|
|
// is a power of 2. See the comments with prepare_range_limit_table for more info.
|
|
|
|
// is a power of 2.
|
|
|
|
private const int RangeMask = (MaxJSample * 4) + 3; // 2 bits wider than legal samples
|
|
|
|
|
|
|
|
// Precomputed values scaled up by 14 bits
|
|
|
|
private static readonly short[] Aanscales = |
|
|
|
{ |
|
|
|
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855, |
|
|
|
12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585, |
|
|
|
5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, |
|
|
|
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873, |
|
|
|
17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299, |
|
|
|
11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315, |
|
|
|
4520, 3552, 2446, 1247 |
|
|
|
}; |
|
|
|
|
|
|
|
private static readonly byte[] Limit = new byte[5 * (MaxJSample + 1)]; |
|
|
|
|
|
|
|
static IDCT() |
|
|
|
@ -81,15 +83,13 @@ |
|
|
|
/// 'Practical Fast 1-D DCT Algorithms with 11 Multiplications',
|
|
|
|
/// IEEE Intl. Conf. on Acoustics, Speech & Signal Processing, 1989, 988-991.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="quantizationTables">The quantization tables</param>
|
|
|
|
/// <param name="component">The fram component</param>
|
|
|
|
/// <param name="blockBufferOffset">The block buffer offset</param>
|
|
|
|
/// <param name="computationBuffer">The computational buffer for holding temp values</param>
|
|
|
|
public static void QuantizeAndInverse(QuantizationTables quantizationTables, ref FrameComponent component, int blockBufferOffset, Buffer<short> computationBuffer) |
|
|
|
/// <param name="quantizationTable">The quantization table</param>
|
|
|
|
public static void QuantizeAndInverse(ref FrameComponent component, int blockBufferOffset, ref Span<short> computationBuffer, ref Span<short> quantizationTable) |
|
|
|
{ |
|
|
|
Span<short> qt = quantizationTables.Tables.GetRowSpan(component.QuantizationIdentifier); |
|
|
|
Span<short> blockData = component.BlockData.Slice(blockBufferOffset); |
|
|
|
Span<short> computationBufferSpan = computationBuffer; |
|
|
|
int v0, v1, v2, v3, v4, v5, v6, v7; |
|
|
|
int p0, p1, p2, p3, p4, p5, p6, p7; |
|
|
|
int t; |
|
|
|
@ -108,32 +108,32 @@ |
|
|
|
p7 = blockData[row + 7]; |
|
|
|
|
|
|
|
// dequant p0
|
|
|
|
p0 *= qt[row]; |
|
|
|
p0 *= quantizationTable[row]; |
|
|
|
|
|
|
|
// check for all-zero AC coefficients
|
|
|
|
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0) |
|
|
|
{ |
|
|
|
t = ((DctSqrt2 * p0) + 512) >> 10; |
|
|
|
short st = (short)t; |
|
|
|
computationBufferSpan[row] = st; |
|
|
|
computationBufferSpan[row + 1] = st; |
|
|
|
computationBufferSpan[row + 2] = st; |
|
|
|
computationBufferSpan[row + 3] = st; |
|
|
|
computationBufferSpan[row + 4] = st; |
|
|
|
computationBufferSpan[row + 5] = st; |
|
|
|
computationBufferSpan[row + 6] = st; |
|
|
|
computationBufferSpan[row + 7] = st; |
|
|
|
computationBuffer[row] = st; |
|
|
|
computationBuffer[row + 1] = st; |
|
|
|
computationBuffer[row + 2] = st; |
|
|
|
computationBuffer[row + 3] = st; |
|
|
|
computationBuffer[row + 4] = st; |
|
|
|
computationBuffer[row + 5] = st; |
|
|
|
computationBuffer[row + 6] = st; |
|
|
|
computationBuffer[row + 7] = st; |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// dequant p1 ... p7
|
|
|
|
p1 *= qt[row + 1]; |
|
|
|
p2 *= qt[row + 2]; |
|
|
|
p3 *= qt[row + 3]; |
|
|
|
p4 *= qt[row + 4]; |
|
|
|
p5 *= qt[row + 5]; |
|
|
|
p6 *= qt[row + 6]; |
|
|
|
p7 *= qt[row + 7]; |
|
|
|
p1 *= quantizationTable[row + 1]; |
|
|
|
p2 *= quantizationTable[row + 2]; |
|
|
|
p3 *= quantizationTable[row + 3]; |
|
|
|
p4 *= quantizationTable[row + 4]; |
|
|
|
p5 *= quantizationTable[row + 5]; |
|
|
|
p6 *= quantizationTable[row + 6]; |
|
|
|
p7 *= quantizationTable[row + 7]; |
|
|
|
|
|
|
|
// stage 4
|
|
|
|
v0 = ((DctSqrt2 * p0) + 128) >> 8; |
|
|
|
@ -169,27 +169,27 @@ |
|
|
|
v6 = t; |
|
|
|
|
|
|
|
// stage 1
|
|
|
|
computationBufferSpan[row] = (short)(v0 + v7); |
|
|
|
computationBufferSpan[row + 7] = (short)(v0 - v7); |
|
|
|
computationBufferSpan[row + 1] = (short)(v1 + v6); |
|
|
|
computationBufferSpan[row + 6] = (short)(v1 - v6); |
|
|
|
computationBufferSpan[row + 2] = (short)(v2 + v5); |
|
|
|
computationBufferSpan[row + 5] = (short)(v2 - v5); |
|
|
|
computationBufferSpan[row + 3] = (short)(v3 + v4); |
|
|
|
computationBufferSpan[row + 4] = (short)(v3 - v4); |
|
|
|
computationBuffer[row] = (short)(v0 + v7); |
|
|
|
computationBuffer[row + 7] = (short)(v0 - v7); |
|
|
|
computationBuffer[row + 1] = (short)(v1 + v6); |
|
|
|
computationBuffer[row + 6] = (short)(v1 - v6); |
|
|
|
computationBuffer[row + 2] = (short)(v2 + v5); |
|
|
|
computationBuffer[row + 5] = (short)(v2 - v5); |
|
|
|
computationBuffer[row + 3] = (short)(v3 + v4); |
|
|
|
computationBuffer[row + 4] = (short)(v3 - v4); |
|
|
|
} |
|
|
|
|
|
|
|
// inverse DCT on columns
|
|
|
|
for (int col = 0; col < 8; ++col) |
|
|
|
{ |
|
|
|
p0 = computationBufferSpan[col]; |
|
|
|
p1 = computationBufferSpan[col + 8]; |
|
|
|
p2 = computationBufferSpan[col + 16]; |
|
|
|
p3 = computationBufferSpan[col + 24]; |
|
|
|
p4 = computationBufferSpan[col + 32]; |
|
|
|
p5 = computationBufferSpan[col + 40]; |
|
|
|
p6 = computationBufferSpan[col + 48]; |
|
|
|
p7 = computationBufferSpan[col + 56]; |
|
|
|
p0 = computationBuffer[col]; |
|
|
|
p1 = computationBuffer[col + 8]; |
|
|
|
p2 = computationBuffer[col + 16]; |
|
|
|
p3 = computationBuffer[col + 24]; |
|
|
|
p4 = computationBuffer[col + 32]; |
|
|
|
p5 = computationBuffer[col + 40]; |
|
|
|
p6 = computationBuffer[col + 48]; |
|
|
|
p7 = computationBuffer[col + 56]; |
|
|
|
|
|
|
|
// check for all-zero AC coefficients
|
|
|
|
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0) |
|
|
|
@ -302,195 +302,188 @@ |
|
|
|
/// precise the scaled value, so this implementation does worse with high -
|
|
|
|
/// quality - setting files than with low - quality ones.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="quantizationTables">The quantization tables</param>
|
|
|
|
/// <param name="component">The fram component</param>
|
|
|
|
/// <param name="component">The frame component</param>
|
|
|
|
/// <param name="blockBufferOffset">The block buffer offset</param>
|
|
|
|
/// <param name="computationBuffer">The computational buffer for holding temp values</param>
|
|
|
|
public static void QuantizeAndInverseAlt( |
|
|
|
QuantizationTables quantizationTables, |
|
|
|
ref FrameComponent component, |
|
|
|
int blockBufferOffset, |
|
|
|
Buffer<short> computationBuffer) |
|
|
|
/// <param name="multiplierTable">The multiplier table</param>
|
|
|
|
public static void QuantizeAndInverseFast(ref FrameComponent component, int blockBufferOffset, ref Span<short> computationBuffer, ref Span<short> multiplierTable) |
|
|
|
{ |
|
|
|
Span<short> qt = quantizationTables.Tables.GetRowSpan(component.QuantizationIdentifier); |
|
|
|
Span<short> blockData = component.BlockData.Slice(blockBufferOffset); |
|
|
|
Span<short> computationBufferSpan = computationBuffer; |
|
|
|
|
|
|
|
// For AA&N IDCT method, multiplier are equal to quantization
|
|
|
|
// coefficients scaled by scalefactor[row]*scalefactor[col], where
|
|
|
|
// scalefactor[0] = 1
|
|
|
|
// scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
|
|
|
|
// For integer operation, the multiplier table is to be scaled by 14.
|
|
|
|
using (var multiplier = new Buffer<short>(64)) |
|
|
|
int p0, p1, p2, p3, p4, p5, p6, p7; |
|
|
|
|
|
|
|
for (int col = 0; col < 8; col++) |
|
|
|
{ |
|
|
|
Span<short> multiplierSpan = multiplier; |
|
|
|
for (int i = 0; i < 64; i++) |
|
|
|
// Gather block data
|
|
|
|
p0 = blockData[col]; |
|
|
|
p1 = blockData[col + 8]; |
|
|
|
p2 = blockData[col + 16]; |
|
|
|
p3 = blockData[col + 24]; |
|
|
|
p4 = blockData[col + 32]; |
|
|
|
p5 = blockData[col + 40]; |
|
|
|
p6 = blockData[col + 48]; |
|
|
|
p7 = blockData[col + 56]; |
|
|
|
|
|
|
|
int tmp0 = p0 * multiplierTable[col]; |
|
|
|
|
|
|
|
// Due to quantization, we will usually find that many of the input
|
|
|
|
// coefficients are zero, especially the AC terms. We can exploit this
|
|
|
|
// by short-circuiting the IDCT calculation for any column in which all
|
|
|
|
// the AC terms are zero. In that case each output is equal to the
|
|
|
|
// DC coefficient (with scale factor as needed).
|
|
|
|
// With typical images and quantization tables, half or more of the
|
|
|
|
// column DCT calculations can be simplified this way.
|
|
|
|
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0) |
|
|
|
{ |
|
|
|
multiplierSpan[i] = (short)Descale(qt[i] * Aanscales[i], 14 - Pass1Bits); |
|
|
|
} |
|
|
|
short dcval = (short)tmp0; |
|
|
|
|
|
|
|
int p0, p1, p2, p3, p4, p5, p6, p7; |
|
|
|
computationBuffer[col] = dcval; |
|
|
|
computationBuffer[col + 8] = dcval; |
|
|
|
computationBuffer[col + 16] = dcval; |
|
|
|
computationBuffer[col + 24] = dcval; |
|
|
|
computationBuffer[col + 32] = dcval; |
|
|
|
computationBuffer[col + 40] = dcval; |
|
|
|
computationBuffer[col + 48] = dcval; |
|
|
|
computationBuffer[col + 56] = dcval; |
|
|
|
|
|
|
|
for (int col = 0; col < 8; col++) |
|
|
|
{ |
|
|
|
// Gather block data
|
|
|
|
p0 = blockData[col]; |
|
|
|
p1 = blockData[col + 8]; |
|
|
|
p2 = blockData[col + 16]; |
|
|
|
p3 = blockData[col + 24]; |
|
|
|
p4 = blockData[col + 32]; |
|
|
|
p5 = blockData[col + 40]; |
|
|
|
p6 = blockData[col + 48]; |
|
|
|
p7 = blockData[col + 56]; |
|
|
|
|
|
|
|
int tmp0 = p0 * multiplierSpan[col]; |
|
|
|
|
|
|
|
// Due to quantization, we will usually find that many of the input
|
|
|
|
// coefficients are zero, especially the AC terms. We can exploit this
|
|
|
|
// by short-circuiting the IDCT calculation for any column in which all
|
|
|
|
// the AC terms are zero. In that case each output is equal to the
|
|
|
|
// DC coefficient (with scale factor as needed).
|
|
|
|
// With typical images and quantization tables, half or more of the
|
|
|
|
// column DCT calculations can be simplified this way.
|
|
|
|
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0) |
|
|
|
{ |
|
|
|
short dcval = (short)tmp0; |
|
|
|
|
|
|
|
computationBufferSpan[col] = dcval; |
|
|
|
computationBufferSpan[col + 8] = dcval; |
|
|
|
computationBufferSpan[col + 16] = dcval; |
|
|
|
computationBufferSpan[col + 24] = dcval; |
|
|
|
computationBufferSpan[col + 32] = dcval; |
|
|
|
computationBufferSpan[col + 40] = dcval; |
|
|
|
computationBufferSpan[col + 48] = dcval; |
|
|
|
computationBufferSpan[col + 56] = dcval; |
|
|
|
|
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Even part
|
|
|
|
int tmp1 = p2 * multiplierSpan[col + 16]; |
|
|
|
int tmp2 = p4 * multiplierSpan[col + 32]; |
|
|
|
int tmp3 = p6 * multiplierSpan[col + 48]; |
|
|
|
|
|
|
|
int tmp10 = tmp0 + tmp2; // Phase 3
|
|
|
|
int tmp11 = tmp0 - tmp2; |
|
|
|
|
|
|
|
int tmp13 = tmp1 + tmp3; // Phases 5-3
|
|
|
|
int tmp12 = Multiply(tmp1 - tmp3, FIX_1_414213562) - tmp13; // 2*c4
|
|
|
|
|
|
|
|
tmp0 = tmp10 + tmp13; // Phase 2
|
|
|
|
tmp3 = tmp10 - tmp13; |
|
|
|
tmp1 = tmp11 + tmp12; |
|
|
|
tmp2 = tmp11 - tmp12; |
|
|
|
|
|
|
|
// Odd Part
|
|
|
|
int tmp4 = p1 * multiplierSpan[col + 8]; |
|
|
|
int tmp5 = p3 * multiplierSpan[col + 24]; |
|
|
|
int tmp6 = p5 * multiplierSpan[col + 40]; |
|
|
|
int tmp7 = p7 * multiplierSpan[col + 56]; |
|
|
|
|
|
|
|
int z13 = tmp6 + tmp5; // Phase 6
|
|
|
|
int z10 = tmp6 - tmp5; |
|
|
|
int z11 = tmp4 + tmp7; |
|
|
|
int z12 = tmp4 - tmp7; |
|
|
|
|
|
|
|
tmp7 = z11 + z13; // Phase 5
|
|
|
|
tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
|
|
|
|
|
|
|
|
int z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
|
|
|
|
tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
|
|
|
|
tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
|
|
|
|
|
|
|
|
tmp6 = tmp12 - tmp7; // Phase 2
|
|
|
|
tmp5 = tmp11 - tmp6; |
|
|
|
tmp4 = tmp10 - tmp5; |
|
|
|
|
|
|
|
computationBufferSpan[col] = (short)(tmp0 + tmp7); |
|
|
|
computationBufferSpan[col + 56] = (short)(tmp0 - tmp7); |
|
|
|
computationBufferSpan[col + 8] = (short)(tmp1 + tmp6); |
|
|
|
computationBufferSpan[col + 48] = (short)(tmp1 - tmp6); |
|
|
|
computationBufferSpan[col + 16] = (short)(tmp2 + tmp5); |
|
|
|
computationBufferSpan[col + 40] = (short)(tmp2 - tmp5); |
|
|
|
computationBufferSpan[col + 24] = (short)(tmp3 + tmp4); |
|
|
|
computationBufferSpan[col + 32] = (short)(tmp3 - tmp4); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Pass 2: process rows from work array, store into output array.
|
|
|
|
// Note that we must descale the results by a factor of 8 == 2**3,
|
|
|
|
// and also undo the pass 1 bits scaling.
|
|
|
|
for (int row = 0; row < 64; row += 8) |
|
|
|
// Even part
|
|
|
|
int tmp1 = p2 * multiplierTable[col + 16]; |
|
|
|
int tmp2 = p4 * multiplierTable[col + 32]; |
|
|
|
int tmp3 = p6 * multiplierTable[col + 48]; |
|
|
|
|
|
|
|
int tmp10 = tmp0 + tmp2; // Phase 3
|
|
|
|
int tmp11 = tmp0 - tmp2; |
|
|
|
|
|
|
|
int tmp13 = tmp1 + tmp3; // Phases 5-3
|
|
|
|
int tmp12 = Multiply(tmp1 - tmp3, FIX_1_414213562) - tmp13; // 2*c4
|
|
|
|
|
|
|
|
tmp0 = tmp10 + tmp13; // Phase 2
|
|
|
|
tmp3 = tmp10 - tmp13; |
|
|
|
tmp1 = tmp11 + tmp12; |
|
|
|
tmp2 = tmp11 - tmp12; |
|
|
|
|
|
|
|
// Odd Part
|
|
|
|
int tmp4 = p1 * multiplierTable[col + 8]; |
|
|
|
int tmp5 = p3 * multiplierTable[col + 24]; |
|
|
|
int tmp6 = p5 * multiplierTable[col + 40]; |
|
|
|
int tmp7 = p7 * multiplierTable[col + 56]; |
|
|
|
|
|
|
|
int z13 = tmp6 + tmp5; // Phase 6
|
|
|
|
int z10 = tmp6 - tmp5; |
|
|
|
int z11 = tmp4 + tmp7; |
|
|
|
int z12 = tmp4 - tmp7; |
|
|
|
|
|
|
|
tmp7 = z11 + z13; // Phase 5
|
|
|
|
tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
|
|
|
|
|
|
|
|
int z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
|
|
|
|
tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
|
|
|
|
tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
|
|
|
|
|
|
|
|
tmp6 = tmp12 - tmp7; // Phase 2
|
|
|
|
tmp5 = tmp11 - tmp6; |
|
|
|
tmp4 = tmp10 - tmp5; |
|
|
|
|
|
|
|
computationBuffer[col] = (short)(tmp0 + tmp7); |
|
|
|
computationBuffer[col + 56] = (short)(tmp0 - tmp7); |
|
|
|
computationBuffer[col + 8] = (short)(tmp1 + tmp6); |
|
|
|
computationBuffer[col + 48] = (short)(tmp1 - tmp6); |
|
|
|
computationBuffer[col + 16] = (short)(tmp2 + tmp5); |
|
|
|
computationBuffer[col + 40] = (short)(tmp2 - tmp5); |
|
|
|
computationBuffer[col + 24] = (short)(tmp3 + tmp4); |
|
|
|
computationBuffer[col + 32] = (short)(tmp3 - tmp4); |
|
|
|
} |
|
|
|
|
|
|
|
// Pass 2: process rows from work array, store into output array.
|
|
|
|
// Note that we must descale the results by a factor of 8 == 2**3,
|
|
|
|
// and also undo the pass 1 bits scaling.
|
|
|
|
for (int row = 0; row < 64; row += 8) |
|
|
|
{ |
|
|
|
p1 = computationBuffer[row + 1]; |
|
|
|
p2 = computationBuffer[row + 2]; |
|
|
|
p3 = computationBuffer[row + 3]; |
|
|
|
p4 = computationBuffer[row + 4]; |
|
|
|
p5 = computationBuffer[row + 5]; |
|
|
|
p6 = computationBuffer[row + 6]; |
|
|
|
p7 = computationBuffer[row + 7]; |
|
|
|
|
|
|
|
// Add range center and fudge factor for final descale and range-limit.
|
|
|
|
int z5 = computationBuffer[row] + (RangeCenter << (Pass1Bits + 3)) + (1 << (Pass1Bits + 2)); |
|
|
|
|
|
|
|
// Check for all-zero AC coefficients
|
|
|
|
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0) |
|
|
|
{ |
|
|
|
p1 = computationBufferSpan[row + 1]; |
|
|
|
p2 = computationBufferSpan[row + 2]; |
|
|
|
p3 = computationBufferSpan[row + 3]; |
|
|
|
p4 = computationBufferSpan[row + 4]; |
|
|
|
p5 = computationBufferSpan[row + 5]; |
|
|
|
p6 = computationBufferSpan[row + 6]; |
|
|
|
p7 = computationBufferSpan[row + 7]; |
|
|
|
|
|
|
|
// Add range center and fudge factor for final descale and range-limit.
|
|
|
|
int z5 = computationBufferSpan[row] + (RangeCenter << (Pass1Bits + 3)) + (1 << (Pass1Bits + 2)); |
|
|
|
|
|
|
|
// Check for all-zero AC coefficients
|
|
|
|
if ((p1 | p2 | p3 | p4 | p5 | p6 | p7) == 0) |
|
|
|
{ |
|
|
|
byte dcval = Limit[LimitOffset + (RightShift(z5, Pass1Bits + 3) & RangeMask)]; |
|
|
|
|
|
|
|
blockData[row] = dcval; |
|
|
|
blockData[row + 1] = dcval; |
|
|
|
blockData[row + 2] = dcval; |
|
|
|
blockData[row + 3] = dcval; |
|
|
|
blockData[row + 4] = dcval; |
|
|
|
blockData[row + 5] = dcval; |
|
|
|
blockData[row + 6] = dcval; |
|
|
|
blockData[row + 7] = dcval; |
|
|
|
|
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Even part
|
|
|
|
int tmp10 = z5 + p4; |
|
|
|
int tmp11 = z5 - p4; |
|
|
|
|
|
|
|
int tmp13 = p2 + p6; |
|
|
|
int tmp12 = Multiply(p2 - p6, FIX_1_414213562) - tmp13; // 2*c4
|
|
|
|
|
|
|
|
int tmp0 = tmp10 + tmp13; |
|
|
|
int tmp3 = tmp10 - tmp13; |
|
|
|
int tmp1 = tmp11 + tmp12; |
|
|
|
int tmp2 = tmp11 - tmp12; |
|
|
|
|
|
|
|
// Odd part
|
|
|
|
int z13 = p5 + p3; |
|
|
|
int z10 = p5 - p3; |
|
|
|
int z11 = p1 + p7; |
|
|
|
int z12 = p1 - p7; |
|
|
|
|
|
|
|
int tmp7 = z11 + z13; // Phase 5
|
|
|
|
tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
|
|
|
|
|
|
|
|
z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
|
|
|
|
tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
|
|
|
|
tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
|
|
|
|
|
|
|
|
int tmp6 = tmp12 - tmp7; // Phase 2
|
|
|
|
int tmp5 = tmp11 - tmp6; |
|
|
|
int tmp4 = tmp10 - tmp5; |
|
|
|
|
|
|
|
// Final output stage: scale down by a factor of 8, offset, and range-limit
|
|
|
|
blockData[row] = Limit[LimitOffset + (RightShift(tmp0 + tmp7, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 7] = Limit[LimitOffset + (RightShift(tmp0 - tmp7, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 1] = Limit[LimitOffset + (RightShift(tmp1 + tmp6, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 6] = Limit[LimitOffset + (RightShift(tmp1 - tmp6, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 2] = Limit[LimitOffset + (RightShift(tmp2 + tmp5, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 5] = Limit[LimitOffset + (RightShift(tmp2 - tmp5, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 3] = Limit[LimitOffset + (RightShift(tmp3 + tmp4, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 4] = Limit[LimitOffset + (RightShift(tmp3 - tmp4, Pass1Bits + 3) & RangeMask)]; |
|
|
|
byte dcval = Limit[LimitOffset + (RightShift(z5, Pass1Bits + 3) & RangeMask)]; |
|
|
|
|
|
|
|
blockData[row] = dcval; |
|
|
|
blockData[row + 1] = dcval; |
|
|
|
blockData[row + 2] = dcval; |
|
|
|
blockData[row + 3] = dcval; |
|
|
|
blockData[row + 4] = dcval; |
|
|
|
blockData[row + 5] = dcval; |
|
|
|
blockData[row + 6] = dcval; |
|
|
|
blockData[row + 7] = dcval; |
|
|
|
|
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Even part
|
|
|
|
int tmp10 = z5 + p4; |
|
|
|
int tmp11 = z5 - p4; |
|
|
|
|
|
|
|
int tmp13 = p2 + p6; |
|
|
|
int tmp12 = Multiply(p2 - p6, FIX_1_414213562) - tmp13; // 2*c4
|
|
|
|
|
|
|
|
int tmp0 = tmp10 + tmp13; |
|
|
|
int tmp3 = tmp10 - tmp13; |
|
|
|
int tmp1 = tmp11 + tmp12; |
|
|
|
int tmp2 = tmp11 - tmp12; |
|
|
|
|
|
|
|
// Odd part
|
|
|
|
int z13 = p5 + p3; |
|
|
|
int z10 = p5 - p3; |
|
|
|
int z11 = p1 + p7; |
|
|
|
int z12 = p1 - p7; |
|
|
|
|
|
|
|
int tmp7 = z11 + z13; // Phase 5
|
|
|
|
tmp11 = Multiply(z11 - z13, FIX_1_414213562); // 2*c4
|
|
|
|
|
|
|
|
z5 = Multiply(z10 + z12, FIX_1_847759065); // 2*c2
|
|
|
|
tmp10 = z5 - Multiply(z12, FIX_1_082392200); // 2*(c2-c6)
|
|
|
|
tmp12 = z5 - Multiply(z10, FIX_2_613125930); // 2*(c2+c6)
|
|
|
|
|
|
|
|
int tmp6 = tmp12 - tmp7; // Phase 2
|
|
|
|
int tmp5 = tmp11 - tmp6; |
|
|
|
int tmp4 = tmp10 - tmp5; |
|
|
|
|
|
|
|
// Final output stage: scale down by a factor of 8, offset, and range-limit
|
|
|
|
blockData[row] = Limit[LimitOffset + (RightShift(tmp0 + tmp7, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 7] = Limit[LimitOffset + (RightShift(tmp0 - tmp7, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 1] = Limit[LimitOffset + (RightShift(tmp1 + tmp6, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 6] = Limit[LimitOffset + (RightShift(tmp1 - tmp6, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 2] = Limit[LimitOffset + (RightShift(tmp2 + tmp5, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 5] = Limit[LimitOffset + (RightShift(tmp2 - tmp5, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 3] = Limit[LimitOffset + (RightShift(tmp3 + tmp4, Pass1Bits + 3) & RangeMask)]; |
|
|
|
blockData[row + 4] = Limit[LimitOffset + (RightShift(tmp3 - tmp4, Pass1Bits + 3) & RangeMask)]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Descale and correctly round an int value that's scaled by <paramref name="n"/> bits.
|
|
|
|
/// We assume <see cref="RightShift"/> rounds towards minus infinity, so adding
|
|
|
|
/// the fudge factor is correct for either sign of <paramref name="value"/>.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="value">The value</param>
|
|
|
|
/// <param name="n">The number of bits</param>
|
|
|
|
/// <returns>The <see cref="int"/></returns>
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
|
|
public static int Descale(int value, int n) |
|
|
|
{ |
|
|
|
return RightShift(value + (1 << (n - 1)), n); |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Multiply a variable by an int constant, and immediately descale.
|
|
|
|
/// </summary>
|
|
|
|
@ -514,19 +507,5 @@ |
|
|
|
{ |
|
|
|
return value >> shift; |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Descale and correctly round an int value that's scaled by <paramref name="n"/> bits.
|
|
|
|
/// We assume <see cref="RightShift"/> rounds towards minus infinity, so adding
|
|
|
|
/// the fudge factor is correct for either sign of <paramref name="value"/>.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="value">The value</param>
|
|
|
|
/// <param name="n">The number of bits</param>
|
|
|
|
/// <returns>The <see cref="int"/></returns>
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
|
|
private static int Descale(int value, int n) |
|
|
|
{ |
|
|
|
return RightShift(value + (1 << (n - 1)), n); |
|
|
|
} |
|
|
|
} |
|
|
|
} |