📷 A modern, cross-platform, 2D Graphics library for .NET
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

367 lines
16 KiB

// ReSharper disable InconsistentNaming
using System;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
{
internal static partial class ReferenceImplementations
{
/// <summary>
/// TODO: produces really bad results for bigger values!
///
/// Contains the "original" golang based DCT/IDCT implementations as reference implementations.
/// 1. ===== Forward DCT =====
/// **** The original golang source claims:
/// It is based on the code in jfdctint.c from the Independent JPEG Group,
/// found at http://www.ijg.org/files/jpegsrc.v8c.tar.gz.
///
/// **** Could be found here as well:
/// https://github.com/mozilla/mozjpeg/blob/master/jfdctint.c
///
/// 2. ===== Inverse DCT =====
///
/// The golang source claims:
/// http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/Video/verifier/mpeg2decode_960109.tar.gz
/// The referenced MPEG2 code claims:
/// /**********************************************************/
/// /* inverse two dimensional DCT, Chen-Wang algorithm */
/// /* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */
/// /* 32-bit integer arithmetic (8 bit coefficients) */
/// /* 11 mults, 29 adds per DCT */
/// /* sE, 18.8.91 */
/// /**********************************************************/
/// /* coefficients extended to 12 bit for IEEE1180-1990 */
/// /* compliance sE, 2.1.94 */
/// /**********************************************************/
///
/// **** The code looks pretty similar to the standard libjpeg IDCT, but without quantization:
/// https://github.com/mozilla/mozjpeg/blob/master/jidctint.c
/// </summary>
public static class StandardIntegerDCT
{
private const int fix_0_298631336 = 2446;
private const int fix_0_390180644 = 3196;
private const int fix_0_541196100 = 4433;
private const int fix_0_765366865 = 6270;
private const int fix_0_899976223 = 7373;
private const int fix_1_175875602 = 9633;
private const int fix_1_501321110 = 12299;
private const int fix_1_847759065 = 15137;
private const int fix_1_961570560 = 16069;
private const int fix_2_053119869 = 16819;
private const int fix_2_562915447 = 20995;
private const int fix_3_072711026 = 25172;
/// <summary>
/// The number of bits
/// </summary>
private const int Bits = 13;
/// <summary>
/// The number of bits to shift by on the first pass.
/// </summary>
private const int Pass1Bits = 2;
/// <summary>
/// The value to shift by
/// </summary>
private const int CenterJSample = 128;
public static Block8x8 Subtract128_TransformFDCT_Upscale8(ref Block8x8 block)
{
var temp = new int[Block8x8.Size];
block.CopyTo(temp);
Subtract128_TransformFDCT_Upscale8_Inplace(temp);
var result = default(Block8x8);
result.LoadFrom(temp);
return result;
}
// [Obsolete("Looks like this method produces really bad results for bigger values!")]
public static Block8x8 TransformIDCT(ref Block8x8 block)
{
var temp = new int[Block8x8.Size];
block.CopyTo(temp);
TransformIDCTInplace(temp);
var result = default(Block8x8);
result.LoadFrom(temp);
return result;
}
/// <summary>
/// Performs a forward DCT on an 8x8 block of coefficients, including a level shift.
/// Leave results scaled up by an overall factor of 8.
/// </summary>
/// <param name="block">The block of coefficients.</param>
public static void Subtract128_TransformFDCT_Upscale8_Inplace(Span<int> block)
{
// Pass 1: process rows.
for (int y = 0; y < 8; y++)
{
int y8 = y * 8;
int x0 = block[y8];
int x1 = block[y8 + 1];
int x2 = block[y8 + 2];
int x3 = block[y8 + 3];
int x4 = block[y8 + 4];
int x5 = block[y8 + 5];
int x6 = block[y8 + 6];
int x7 = block[y8 + 7];
int tmp0 = x0 + x7;
int tmp1 = x1 + x6;
int tmp2 = x2 + x5;
int tmp3 = x3 + x4;
int tmp10 = tmp0 + tmp3;
int tmp12 = tmp0 - tmp3;
int tmp11 = tmp1 + tmp2;
int tmp13 = tmp1 - tmp2;
tmp0 = x0 - x7;
tmp1 = x1 - x6;
tmp2 = x2 - x5;
tmp3 = x3 - x4;
block[y8] = (tmp10 + tmp11 - (8 * CenterJSample)) << Pass1Bits;
block[y8 + 4] = (tmp10 - tmp11) << Pass1Bits;
int z1 = (tmp12 + tmp13) * fix_0_541196100;
z1 += 1 << (Bits - Pass1Bits - 1);
block[y8 + 2] = (z1 + (tmp12 * fix_0_765366865)) >> (Bits - Pass1Bits);
block[y8 + 6] = (z1 - (tmp13 * fix_1_847759065)) >> (Bits - Pass1Bits);
tmp10 = tmp0 + tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
z1 = (tmp12 + tmp13) * fix_1_175875602;
z1 += 1 << (Bits - Pass1Bits - 1);
tmp0 = tmp0 * fix_1_501321110;
tmp1 = tmp1 * fix_3_072711026;
tmp2 = tmp2 * fix_2_053119869;
tmp3 = tmp3 * fix_0_298631336;
tmp10 = tmp10 * -fix_0_899976223;
tmp11 = tmp11 * -fix_2_562915447;
tmp12 = tmp12 * -fix_0_390180644;
tmp13 = tmp13 * -fix_1_961570560;
tmp12 += z1;
tmp13 += z1;
block[y8 + 1] = (tmp0 + tmp10 + tmp12) >> (Bits - Pass1Bits);
block[y8 + 3] = (tmp1 + tmp11 + tmp13) >> (Bits - Pass1Bits);
block[y8 + 5] = (tmp2 + tmp11 + tmp12) >> (Bits - Pass1Bits);
block[y8 + 7] = (tmp3 + tmp10 + tmp13) >> (Bits - Pass1Bits);
}
// Pass 2: process columns.
// We remove pass1Bits scaling, but leave results scaled up by an overall factor of 8.
for (int x = 0; x < 8; x++)
{
int tmp0 = block[x] + block[56 + x];
int tmp1 = block[8 + x] + block[48 + x];
int tmp2 = block[16 + x] + block[40 + x];
int tmp3 = block[24 + x] + block[32 + x];
int tmp10 = tmp0 + tmp3 + (1 << (Pass1Bits - 1));
int tmp12 = tmp0 - tmp3;
int tmp11 = tmp1 + tmp2;
int tmp13 = tmp1 - tmp2;
tmp0 = block[x] - block[56 + x];
tmp1 = block[8 + x] - block[48 + x];
tmp2 = block[16 + x] - block[40 + x];
tmp3 = block[24 + x] - block[32 + x];
block[x] = (tmp10 + tmp11) >> Pass1Bits;
block[32 + x] = (tmp10 - tmp11) >> Pass1Bits;
int z1 = (tmp12 + tmp13) * fix_0_541196100;
z1 += 1 << (Bits + Pass1Bits - 1);
block[16 + x] = (z1 + (tmp12 * fix_0_765366865)) >> (Bits + Pass1Bits);
block[48 + x] = (z1 - (tmp13 * fix_1_847759065)) >> (Bits + Pass1Bits);
tmp10 = tmp0 + tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp0 + tmp2;
tmp13 = tmp1 + tmp3;
z1 = (tmp12 + tmp13) * fix_1_175875602;
z1 += 1 << (Bits + Pass1Bits - 1);
tmp0 = tmp0 * fix_1_501321110;
tmp1 = tmp1 * fix_3_072711026;
tmp2 = tmp2 * fix_2_053119869;
tmp3 = tmp3 * fix_0_298631336;
tmp10 = tmp10 * -fix_0_899976223;
tmp11 = tmp11 * -fix_2_562915447;
tmp12 = tmp12 * -fix_0_390180644;
tmp13 = tmp13 * -fix_1_961570560;
tmp12 += z1;
tmp13 += z1;
block[8 + x] = (tmp0 + tmp10 + tmp12) >> (Bits + Pass1Bits);
block[24 + x] = (tmp1 + tmp11 + tmp13) >> (Bits + Pass1Bits);
block[40 + x] = (tmp2 + tmp11 + tmp12) >> (Bits + Pass1Bits);
block[56 + x] = (tmp3 + tmp10 + tmp13) >> (Bits + Pass1Bits);
}
}
private const int w1 = 2841; // 2048*sqrt(2)*cos(1*pi/16)
private const int w2 = 2676; // 2048*sqrt(2)*cos(2*pi/16)
private const int w3 = 2408; // 2048*sqrt(2)*cos(3*pi/16)
private const int w5 = 1609; // 2048*sqrt(2)*cos(5*pi/16)
private const int w6 = 1108; // 2048*sqrt(2)*cos(6*pi/16)
private const int w7 = 565; // 2048*sqrt(2)*cos(7*pi/16)
private const int w1pw7 = w1 + w7;
private const int w1mw7 = w1 - w7;
private const int w2pw6 = w2 + w6;
private const int w2mw6 = w2 - w6;
private const int w3pw5 = w3 + w5;
private const int w3mw5 = w3 - w5;
private const int r2 = 181; // 256/sqrt(2)
/// <summary>
/// Performs a 2-D Inverse Discrete Cosine Transformation.
/// <para>
/// The input coefficients should already have been multiplied by the
/// appropriate quantization table. We use fixed-point computation, with the
/// number of bits for the fractional component varying over the intermediate
/// stages.
/// </para>
/// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the
/// discrete W transform and for the discrete Fourier transform", IEEE Trans. on
/// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
/// </summary>
/// <param name="src">The source block of coefficients</param>
// [Obsolete("Looks like this method produces really bad results for bigger values!")]
public static void TransformIDCTInplace(Span<int> src)
{
// Horizontal 1-D IDCT.
for (int y = 0; y < 8; y++)
{
int y8 = y * 8;
// If all the AC components are zero, then the IDCT is trivial.
if (src[y8 + 1] == 0 && src[y8 + 2] == 0 && src[y8 + 3] == 0 &&
src[y8 + 4] == 0 && src[y8 + 5] == 0 && src[y8 + 6] == 0 && src[y8 + 7] == 0)
{
int dc = src[y8 + 0] << 3;
src[y8 + 0] = dc;
src[y8 + 1] = dc;
src[y8 + 2] = dc;
src[y8 + 3] = dc;
src[y8 + 4] = dc;
src[y8 + 5] = dc;
src[y8 + 6] = dc;
src[y8 + 7] = dc;
continue;
}
// Prescale.
int x0 = (src[y8 + 0] << 11) + 128;
int x1 = src[y8 + 4] << 11;
int x2 = src[y8 + 6];
int x3 = src[y8 + 2];
int x4 = src[y8 + 1];
int x5 = src[y8 + 7];
int x6 = src[y8 + 5];
int x7 = src[y8 + 3];
// Stage 1.
int x8 = w7 * (x4 + x5);
x4 = x8 + (w1mw7 * x4);
x5 = x8 - (w1pw7 * x5);
x8 = w3 * (x6 + x7);
x6 = x8 - (w3mw5 * x6);
x7 = x8 - (w3pw5 * x7);
// Stage 2.
x8 = x0 + x1;
x0 -= x1;
x1 = w6 * (x3 + x2);
x2 = x1 - (w2pw6 * x2);
x3 = x1 + (w2mw6 * x3);
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
// Stage 3.
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = ((r2 * (x4 + x5)) + 128) >> 8;
x4 = ((r2 * (x4 - x5)) + 128) >> 8;
// Stage 4.
src[y8 + 0] = (x7 + x1) >> 8;
src[y8 + 1] = (x3 + x2) >> 8;
src[y8 + 2] = (x0 + x4) >> 8;
src[y8 + 3] = (x8 + x6) >> 8;
src[y8 + 4] = (x8 - x6) >> 8;
src[y8 + 5] = (x0 - x4) >> 8;
src[y8 + 6] = (x3 - x2) >> 8;
src[y8 + 7] = (x7 - x1) >> 8;
}
// Vertical 1-D IDCT.
for (int x = 0; x < 8; x++)
{
// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
// we do not bother to check for the all-zero case.
// Prescale.
int y0 = (src[x] << 8) + 8192;
int y1 = src[32 + x] << 8;
int y2 = src[48 + x];
int y3 = src[16 + x];
int y4 = src[8 + x];
int y5 = src[56 + x];
int y6 = src[40 + x];
int y7 = src[24 + x];
// Stage 1.
int y8 = (w7 * (y4 + y5)) + 4;
y4 = (y8 + (w1mw7 * y4)) >> 3;
y5 = (y8 - (w1pw7 * y5)) >> 3;
y8 = (w3 * (y6 + y7)) + 4;
y6 = (y8 - (w3mw5 * y6)) >> 3;
y7 = (y8 - (w3pw5 * y7)) >> 3;
// Stage 2.
y8 = y0 + y1;
y0 -= y1;
y1 = (w6 * (y3 + y2)) + 4;
y2 = (y1 - (w2pw6 * y2)) >> 3;
y3 = (y1 + (w2mw6 * y3)) >> 3;
y1 = y4 + y6;
y4 -= y6;
y6 = y5 + y7;
y5 -= y7;
// Stage 3.
y7 = y8 + y3;
y8 -= y3;
y3 = y0 + y2;
y0 -= y2;
y2 = ((r2 * (y4 + y5)) + 128) >> 8;
y4 = ((r2 * (y4 - y5)) + 128) >> 8;
// Stage 4.
src[x] = (y7 + y1) >> 14;
src[8 + x] = (y3 + y2) >> 14;
src[16 + x] = (y0 + y4) >> 14;
src[24 + x] = (y8 + y6) >> 14;
src[32 + x] = (y8 - y6) >> 14;
src[40 + x] = (y0 - y4) >> 14;
src[48 + x] = (y3 - y2) >> 14;
src[56 + x] = (y7 - y1) >> 14;
}
}
}
}
}