From f1908c8620a7f239ae2ad1713a3c71275ac42550 Mon Sep 17 00:00:00 2001 From: Ynse Hoornenborg Date: Sun, 6 Oct 2024 16:28:23 +0200 Subject: [PATCH] Implement all 1 dimensional forward transforms --- .../Av1/Transform/Av1ForwardTransformer.cs | 62 +- .../Av1Transform2dFlipConfiguration.cs | 12 +- .../Forward/Av1Adst16Forward1dTransformer.cs | 176 ++++- .../Forward/Av1Adst32Forward1dTransformer.cs | 387 ++++++++- .../Forward/Av1Adst4Forward1dTransformer.cs | 64 +- .../Forward/Av1Adst8Forward1dTransformer.cs | 84 +- .../Forward/Av1Dct16Forward1dTransformer.cs | 139 +++- .../Forward/Av1Dct32Forward1dTransformer.cs | 319 +++++++- .../Forward/Av1Dct4Forward1dTransformer.cs | 10 +- .../Forward/Av1Dct64Forward1dTransformer.cs | 739 +++++++++++++++++- .../Forward/Av1Dct8Forward1dTransformer.cs | 63 +- .../Forward/Av1Forward2dTransformerBase.cs | 6 +- .../Av1Identity16Forward1dTransformer.cs | 26 +- .../Av1Identity32Forward1dTransformer.cs | 21 +- .../Av1Identity4Forward1dTransformer.cs | 12 +- .../Av1Identity64Forward1dTransformer.cs | 31 +- .../Av1Identity8Forward1dTransformer.cs | 16 +- .../Heif/Av1/Av1ForwardTransformTests.cs | 148 ++-- .../Formats/Heif/Av1/Av1ReferenceTransform.cs | 4 +- 19 files changed, 2190 insertions(+), 129 deletions(-) diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs index 86a668a802..3528842677 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs @@ -31,7 +31,7 @@ internal class Av1ForwardTransformer null ]; - private static readonly int[] TemporaryCoefficientsBuffer = new int[64 * 64]; + private static readonly int[] TemporaryCoefficientsBuffer = new int[Av1Constants.MaxTransformSize * Av1Constants.MaxTransformSize]; internal static void Transform2d(Span input, Span coefficients, uint stride, Av1TransformType transformType, Av1TransformSize transformSize, int bitDepth) { @@ -49,54 +49,6 @@ internal class Av1ForwardTransformer } } - internal static void Transform2dAvx2(Span input, Span coefficients, uint stride, Av1TransformType transformType, Av1TransformSize transformSize, int bitDepth) - { - switch (transformSize) - { - case Av1TransformSize.Size4x4: - // Too small for intrinsics, use the scalar codepath instead. - Transform2d(input, coefficients, stride, transformType, transformSize, bitDepth); - break; - case Av1TransformSize.Size8x8: - Transform8x8Avx2(input, coefficients, stride, transformType, bitDepth); - break; - default: - Transform2d(input, coefficients, stride, transformType, transformSize, bitDepth); - break; - } - } - - /// - /// SVT: svt_av1_fwd_txfm2d_8x8_avx2 - /// - private static void Transform8x8Avx2(Span input, Span coefficients, uint stride, Av1TransformType transformType, int bitDepth) - { - Av1Transform2dFlipConfiguration config = new(transformType, Av1TransformSize.Size8x8); - Span shift = config.Shift; - Span> inVector = stackalloc Vector256[8]; - Span> outVector = stackalloc Vector256[8]; - ref Vector256 inRef = ref inVector[0]; - ref Vector256 outRef = ref outVector[0]; - switch (transformType) - { - case Av1TransformType.DctDct: - /* Pseudo code - Av1Dct8ForwardTransformer dct8 = new(); - LoadBuffer8x8(ref input[0], ref inRef, stride, 0, 0, shift[0]); - dct8.TransformAvx2(ref inRef, ref outRef, config.CosBitColumn, 1); - Column8x8Rounding(ref outRef, -shift[1]); - Transpose8x8Avx2(ref outRef, ref inRef); - dct8.TransformAvx2(ref inRef, ref outRef, config.CosBitRow, 1); - Transpose8x8Avx2(ref outRef, ref inRef); - WriteBuffer8x8(ref inRef, ref coefficients[0]); - break; - */ - throw new NotImplementedException(); - default: - throw new NotImplementedException(); - } - } - private static IAv1Forward1dTransformer? GetTransformer(Av1TransformFunctionType transformerType) => Transformers[(int)transformerType]; @@ -155,7 +107,7 @@ internal class Av1ForwardTransformer uint t = (uint)(c + ((transformRowCount - 1) * (int)inputStride)); for (r = 0; r < transformRowCount; ++r) { - // flip upside down + // Flip upside down Unsafe.Add(ref tempIn, r) = Unsafe.Add(ref input, t); t -= inputStride; } @@ -188,17 +140,23 @@ internal class Av1ForwardTransformer // Rows for (r = 0; r < transformRowCount; ++r) { - transformFunctionRow.Transform(ref Unsafe.Add(ref buf, r * transformColumnCount), ref Unsafe.Add(ref output, r * transformColumnCount), cosBitRow, stageRangeRow); + transformFunctionRow.Transform( + ref Unsafe.Add(ref buf, r * transformColumnCount), + ref Unsafe.Add(ref output, r * transformColumnCount), + cosBitRow, + stageRangeRow); RoundShiftArray(ref Unsafe.Add(ref output, r * transformColumnCount), transformColumnCount, -shift[2]); if (Math.Abs(rectangleType) == 1) { // Multiply everything by Sqrt2 if the transform is rectangular and the // size difference is a factor of 2. + int t = r * transformColumnCount; for (c = 0; c < transformColumnCount; ++c) { - ref int current = ref Unsafe.Add(ref output, (r * transformColumnCount) + c); + ref int current = ref Unsafe.Add(ref output, t); current = Av1Math.RoundShift((long)current * NewSqrt, NewSqrtBitCount); + t++; } } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs index 4ebed44c6b..7fd6d6d95d 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs @@ -131,15 +131,15 @@ internal class Av1Transform2dFlipConfiguration this.TransformSize = transformSize; this.TransformType = transformType; this.SetFlip(transformType); - Av1TransformType1d tx_type_1d_col = VerticalType[(int)transformType]; - Av1TransformType1d tx_type_1d_row = HorizontalType[(int)transformType]; + this.TransformTypeColumn = VerticalType[(int)transformType]; + this.TransformTypeRow = HorizontalType[(int)transformType]; int txw_idx = transformSize.GetBlockWidthLog2() - SmallestTransformSizeLog2; int txh_idx = transformSize.GetBlockHeightLog2() - SmallestTransformSizeLog2; this.shift = ShiftMap[(int)transformSize]; this.CosBitColumn = CosBitColumnMap[txw_idx][txh_idx]; this.CosBitRow = CosBitRowMap[txw_idx][txh_idx]; - this.TransformFunctionTypeColumn = TransformFunctionTypeMap[txh_idx][(int)tx_type_1d_col]; - this.TransformFunctionTypeRow = TransformFunctionTypeMap[txw_idx][(int)tx_type_1d_row]; + this.TransformFunctionTypeColumn = TransformFunctionTypeMap[txh_idx][(int)this.TransformTypeColumn]; + this.TransformFunctionTypeRow = TransformFunctionTypeMap[txw_idx][(int)this.TransformTypeRow]; this.StageNumberColumn = StageNumberList[(int)this.TransformFunctionTypeColumn]; this.StageNumberRow = StageNumberList[(int)this.TransformFunctionTypeRow]; this.StageRangeColumn = new byte[12]; @@ -151,6 +151,10 @@ internal class Av1Transform2dFlipConfiguration public int CosBitRow { get; } + public Av1TransformType1d TransformTypeColumn { get; } + + public Av1TransformType1d TransformTypeRow { get; } + public Av1TransformFunctionType TransformFunctionTypeColumn { get; } public Av1TransformFunctionType TransformFunctionTypeRow { get; } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs index f52e348560..b3c343538c 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs @@ -1,10 +1,184 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Adst16Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output, cosBit); + + private static void TransformScalar(ref int input, ref int output, int cosBit) + { + Span temp0 = stackalloc int[16]; + Span temp1 = stackalloc int[16]; + + // stage 0; + + // stage 1; + Guard.IsFalse(output == input, nameof(output), "Cannot operate on same buffer for input and output."); + temp1[0] = input; + temp1[1] = -Unsafe.Add(ref input, 15); + temp1[2] = -Unsafe.Add(ref input, 7); + temp1[3] = Unsafe.Add(ref input, 8); + temp1[4] = -Unsafe.Add(ref input, 3); + temp1[5] = Unsafe.Add(ref input, 12); + temp1[6] = Unsafe.Add(ref input, 4); + temp1[7] = -Unsafe.Add(ref input, 11); + temp1[8] = -Unsafe.Add(ref input, 1); + temp1[9] = Unsafe.Add(ref input, 14); + temp1[10] = Unsafe.Add(ref input, 6); + temp1[11] = -Unsafe.Add(ref input, 9); + temp1[12] = Unsafe.Add(ref input, 2); + temp1[13] = -Unsafe.Add(ref input, 13); + temp1[14] = -Unsafe.Add(ref input, 5); + temp1[15] = Unsafe.Add(ref input, 10); + + // stage 2 + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[2], cospi[32], temp1[3], cosBit); + temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[2], -cospi[32], temp1[3], cosBit); + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[7], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], -cospi[32], temp1[7], cosBit); + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[10], cospi[32], temp1[11], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[10], -cospi[32], temp1[11], cosBit); + temp0[12] = temp1[12]; + temp0[13] = temp1[13]; + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[14], cospi[32], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[14], -cospi[32], temp1[15], cosBit); + + // stage 3 + temp1[0] = temp0[0] + temp0[2]; + temp1[1] = temp0[1] + temp0[3]; + temp1[2] = temp0[0] - temp0[2]; + temp1[3] = temp0[1] - temp0[3]; + temp1[4] = temp0[4] + temp0[6]; + temp1[5] = temp0[5] + temp0[7]; + temp1[6] = temp0[4] - temp0[6]; + temp1[7] = temp0[5] - temp0[7]; + temp1[8] = temp0[8] + temp0[10]; + temp1[9] = temp0[9] + temp0[11]; + temp1[10] = temp0[8] - temp0[10]; + temp1[11] = temp0[9] - temp0[11]; + temp1[12] = temp0[12] + temp0[14]; + temp1[13] = temp0[13] + temp0[15]; + temp1[14] = temp0[12] - temp0[14]; + temp1[15] = temp0[13] - temp0[15]; + + // stage 4 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[4], cospi[48], temp1[5], cosBit); + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[4], -cospi[16], temp1[5], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[6], cospi[16], temp1[7], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[6], cospi[48], temp1[7], cosBit); + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = temp1[10]; + temp0[11] = temp1[11]; + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[12], cospi[48], temp1[13], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[12], -cospi[16], temp1[13], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[14], cospi[16], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[14], cospi[48], temp1[15], cosBit); + + // stage 5 + temp1[0] = temp0[0] + temp0[4]; + temp1[1] = temp0[1] + temp0[5]; + temp1[2] = temp0[2] + temp0[6]; + temp1[3] = temp0[3] + temp0[7]; + temp1[4] = temp0[0] - temp0[4]; + temp1[5] = temp0[1] - temp0[5]; + temp1[6] = temp0[2] - temp0[6]; + temp1[7] = temp0[3] - temp0[7]; + temp1[8] = temp0[8] + temp0[12]; + temp1[9] = temp0[9] + temp0[13]; + temp1[10] = temp0[10] + temp0[14]; + temp1[11] = temp0[11] + temp0[15]; + temp1[12] = temp0[8] - temp0[12]; + temp1[13] = temp0[9] - temp0[13]; + temp1[14] = temp0[10] - temp0[14]; + temp1[15] = temp0[11] - temp0[15]; + + // stage 6 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[8], cospi[56], temp1[9], cosBit); + temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[8], -cospi[8], temp1[9], cosBit); + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[10], cospi[24], temp1[11], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[10], -cospi[40], temp1[11], cosBit); + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[12], cospi[8], temp1[13], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[12], cospi[56], temp1[13], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[14], cospi[40], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[14], cospi[24], temp1[15], cosBit); + + // stage 7 + temp1[0] = temp0[0] + temp0[8]; + temp1[1] = temp0[1] + temp0[9]; + temp1[2] = temp0[2] + temp0[10]; + temp1[3] = temp0[3] + temp0[11]; + temp1[4] = temp0[4] + temp0[12]; + temp1[5] = temp0[5] + temp0[13]; + temp1[6] = temp0[6] + temp0[14]; + temp1[7] = temp0[7] + temp0[15]; + temp1[8] = temp0[0] - temp0[8]; + temp1[9] = temp0[1] - temp0[9]; + temp1[10] = temp0[2] - temp0[10]; + temp1[11] = temp0[3] - temp0[11]; + temp1[12] = temp0[4] - temp0[12]; + temp1[13] = temp0[5] - temp0[13]; + temp1[14] = temp0[6] - temp0[14]; + temp1[15] = temp0[7] - temp0[15]; + + // stage 8 + temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[2], temp1[0], cospi[62], temp1[1], cosBit); + temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp1[0], -cospi[2], temp1[1], cosBit); + temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[10], temp1[2], cospi[54], temp1[3], cosBit); + temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp1[2], -cospi[10], temp1[3], cosBit); + temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[18], temp1[4], cospi[46], temp1[5], cosBit); + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp1[4], -cospi[18], temp1[5], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[26], temp1[6], cospi[38], temp1[7], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp1[6], -cospi[26], temp1[7], cosBit); + temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[34], temp1[8], cospi[30], temp1[9], cosBit); + temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp1[8], -cospi[34], temp1[9], cosBit); + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[42], temp1[10], cospi[22], temp1[11], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp1[10], -cospi[42], temp1[11], cosBit); + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[50], temp1[12], cospi[14], temp1[13], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp1[12], -cospi[50], temp1[13], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[58], temp1[14], cospi[6], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp1[14], -cospi[58], temp1[15], cosBit); + + // stage 9 + output = temp0[1]; + Unsafe.Add(ref output, 1) = temp0[14]; + Unsafe.Add(ref output, 2) = temp0[3]; + Unsafe.Add(ref output, 3) = temp0[12]; + Unsafe.Add(ref output, 4) = temp0[5]; + Unsafe.Add(ref output, 5) = temp0[10]; + Unsafe.Add(ref output, 6) = temp0[7]; + Unsafe.Add(ref output, 7) = temp0[8]; + Unsafe.Add(ref output, 8) = temp0[9]; + Unsafe.Add(ref output, 9) = temp0[6]; + Unsafe.Add(ref output, 10) = temp0[11]; + Unsafe.Add(ref output, 11) = temp0[4]; + Unsafe.Add(ref output, 12) = temp0[13]; + Unsafe.Add(ref output, 13) = temp0[2]; + Unsafe.Add(ref output, 14) = temp0[15]; + Unsafe.Add(ref output, 15) = temp0[0]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs index 5a5b7765f5..9f707ae639 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs @@ -1,10 +1,395 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Adst32Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output, cosBit); + + private static void TransformScalar(ref int input, ref int outputRef, int cosBit) + { + Span temp0 = stackalloc int[32]; + Span temp1 = stackalloc int[32]; + + // stage 0; + + // stage 1; + temp1[0] = Unsafe.Add(ref input, 31); + temp1[1] = input; + temp1[2] = Unsafe.Add(ref input, 29); + temp1[3] = Unsafe.Add(ref input, 2); + temp1[4] = Unsafe.Add(ref input, 27); + temp1[5] = Unsafe.Add(ref input, 4); + temp1[6] = Unsafe.Add(ref input, 25); + temp1[7] = Unsafe.Add(ref input, 6); + temp1[8] = Unsafe.Add(ref input, 23); + temp1[9] = Unsafe.Add(ref input, 8); + temp1[10] = Unsafe.Add(ref input, 21); + temp1[11] = Unsafe.Add(ref input, 10); + temp1[12] = Unsafe.Add(ref input, 19); + temp1[13] = Unsafe.Add(ref input, 12); + temp1[14] = Unsafe.Add(ref input, 17); + temp1[15] = Unsafe.Add(ref input, 14); + temp1[16] = Unsafe.Add(ref input, 15); + temp1[17] = Unsafe.Add(ref input, 16); + temp1[18] = Unsafe.Add(ref input, 13); + temp1[19] = Unsafe.Add(ref input, 18); + temp1[20] = Unsafe.Add(ref input, 11); + temp1[21] = Unsafe.Add(ref input, 20); + temp1[22] = Unsafe.Add(ref input, 9); + temp1[23] = Unsafe.Add(ref input, 22); + temp1[24] = Unsafe.Add(ref input, 7); + temp1[25] = Unsafe.Add(ref input, 24); + temp1[26] = Unsafe.Add(ref input, 5); + temp1[27] = Unsafe.Add(ref input, 26); + temp1[28] = Unsafe.Add(ref input, 3); + temp1[29] = Unsafe.Add(ref input, 28); + temp1[30] = Unsafe.Add(ref input, 1); + temp1[31] = Unsafe.Add(ref input, 30); + + // stage 2 + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[1], temp1[0], cospi[63], temp1[1], cosBit); + temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[1], temp1[1], cospi[63], temp1[0], cosBit); + temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[5], temp1[2], cospi[59], temp1[3], cosBit); + temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[5], temp1[3], cospi[59], temp1[2], cosBit); + temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[9], temp1[4], cospi[55], temp1[5], cosBit); + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[9], temp1[5], cospi[55], temp1[4], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[13], temp1[6], cospi[51], temp1[7], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[13], temp1[7], cospi[51], temp1[6], cosBit); + temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[17], temp1[8], cospi[47], temp1[9], cosBit); + temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[17], temp1[9], cospi[47], temp1[8], cosBit); + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[21], temp1[10], cospi[43], temp1[11], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[21], temp1[11], cospi[43], temp1[10], cosBit); + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[25], temp1[12], cospi[39], temp1[13], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[25], temp1[13], cospi[39], temp1[12], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[29], temp1[14], cospi[35], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[29], temp1[15], cospi[35], temp1[14], cosBit); + temp0[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[33], temp1[16], cospi[31], temp1[17], cosBit); + temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[33], temp1[17], cospi[31], temp1[16], cosBit); + temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[37], temp1[18], cospi[27], temp1[19], cosBit); + temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[37], temp1[19], cospi[27], temp1[18], cosBit); + temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[41], temp1[20], cospi[23], temp1[21], cosBit); + temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[41], temp1[21], cospi[23], temp1[20], cosBit); + temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[45], temp1[22], cospi[19], temp1[23], cosBit); + temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[45], temp1[23], cospi[19], temp1[22], cosBit); + temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[49], temp1[24], cospi[15], temp1[25], cosBit); + temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[49], temp1[25], cospi[15], temp1[24], cosBit); + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[53], temp1[26], cospi[11], temp1[27], cosBit); + temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[53], temp1[27], cospi[11], temp1[26], cosBit); + temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[57], temp1[28], cospi[7], temp1[29], cosBit); + temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[57], temp1[29], cospi[7], temp1[28], cosBit); + temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[61], temp1[30], cospi[3], temp1[31], cosBit); + temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[61], temp1[31], cospi[3], temp1[30], cosBit); + + // stage 3 + temp1[0] = temp0[0] + temp0[16]; + temp1[1] = temp0[1] + temp0[17]; + temp1[2] = temp0[2] + temp0[18]; + temp1[3] = temp0[3] + temp0[19]; + temp1[4] = temp0[4] + temp0[20]; + temp1[5] = temp0[5] + temp0[21]; + temp1[6] = temp0[6] + temp0[22]; + temp1[7] = temp0[7] + temp0[23]; + temp1[8] = temp0[8] + temp0[24]; + temp1[9] = temp0[9] + temp0[25]; + temp1[10] = temp0[10] + temp0[26]; + temp1[11] = temp0[11] + temp0[27]; + temp1[12] = temp0[12] + temp0[28]; + temp1[13] = temp0[13] + temp0[29]; + temp1[14] = temp0[14] + temp0[30]; + temp1[15] = temp0[15] + temp0[31]; + temp1[16] = -temp0[16] + temp0[0]; + temp1[17] = -temp0[17] + temp0[1]; + temp1[18] = -temp0[18] + temp0[2]; + temp1[19] = -temp0[19] + temp0[3]; + temp1[20] = -temp0[20] + temp0[4]; + temp1[21] = -temp0[21] + temp0[5]; + temp1[22] = -temp0[22] + temp0[6]; + temp1[23] = -temp0[23] + temp0[7]; + temp1[24] = -temp0[24] + temp0[8]; + temp1[25] = -temp0[25] + temp0[9]; + temp1[26] = -temp0[26] + temp0[10]; + temp1[27] = -temp0[27] + temp0[11]; + temp1[28] = -temp0[28] + temp0[12]; + temp1[29] = -temp0[29] + temp0[13]; + temp1[30] = -temp0[30] + temp0[14]; + temp1[31] = -temp0[31] + temp0[15]; + + // stage 4 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = temp1[10]; + temp0[11] = temp1[11]; + temp0[12] = temp1[12]; + temp0[13] = temp1[13]; + temp0[14] = temp1[14]; + temp0[15] = temp1[15]; + temp0[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[4], temp1[16], cospi[60], temp1[17], cosBit); + temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[4], temp1[17], cospi[60], temp1[16], cosBit); + temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[20], temp1[18], cospi[44], temp1[19], cosBit); + temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[20], temp1[19], cospi[44], temp1[18], cosBit); + temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[36], temp1[20], cospi[28], temp1[21], cosBit); + temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[36], temp1[21], cospi[28], temp1[20], cosBit); + temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[52], temp1[22], cospi[12], temp1[23], cosBit); + temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[52], temp1[23], cospi[12], temp1[22], cosBit); + temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[60], temp1[24], cospi[4], temp1[25], cosBit); + temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp1[25], cospi[4], temp1[24], cosBit); + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[44], temp1[26], cospi[20], temp1[27], cosBit); + temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp1[27], cospi[20], temp1[26], cosBit); + temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[28], temp1[28], cospi[36], temp1[29], cosBit); + temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp1[29], cospi[36], temp1[28], cosBit); + temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[12], temp1[30], cospi[52], temp1[31], cosBit); + temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp1[31], cospi[52], temp1[30], cosBit); + + // stage 5 + temp1[0] = temp0[0] + temp0[8]; + temp1[1] = temp0[1] + temp0[9]; + temp1[2] = temp0[2] + temp0[10]; + temp1[3] = temp0[3] + temp0[11]; + temp1[4] = temp0[4] + temp0[12]; + temp1[5] = temp0[5] + temp0[13]; + temp1[6] = temp0[6] + temp0[14]; + temp1[7] = temp0[7] + temp0[15]; + temp1[8] = -temp0[8] + temp0[0]; + temp1[9] = -temp0[9] + temp0[1]; + temp1[10] = -temp0[10] + temp0[2]; + temp1[11] = -temp0[11] + temp0[3]; + temp1[12] = -temp0[12] + temp0[4]; + temp1[13] = -temp0[13] + temp0[5]; + temp1[14] = -temp0[14] + temp0[6]; + temp1[15] = -temp0[15] + temp0[7]; + temp1[16] = temp0[16] + temp0[24]; + temp1[17] = temp0[17] + temp0[25]; + temp1[18] = temp0[18] + temp0[26]; + temp1[19] = temp0[19] + temp0[27]; + temp1[20] = temp0[20] + temp0[28]; + temp1[21] = temp0[21] + temp0[29]; + temp1[22] = temp0[22] + temp0[30]; + temp1[23] = temp0[23] + temp0[31]; + temp1[24] = -temp0[24] + temp0[16]; + temp1[25] = -temp0[25] + temp0[17]; + temp1[26] = -temp0[26] + temp0[18]; + temp1[27] = -temp0[27] + temp0[19]; + temp1[28] = -temp0[28] + temp0[20]; + temp1[29] = -temp0[29] + temp0[21]; + temp1[30] = -temp0[30] + temp0[22]; + temp1[31] = -temp0[31] + temp0[23]; + + // stage 6 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[8], cospi[56], temp1[9], cosBit); + temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp1[9], cospi[56], temp1[8], cosBit); + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[10], cospi[24], temp1[11], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp1[11], cospi[24], temp1[10], cosBit); + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[12], cospi[8], temp1[13], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[13], cospi[8], temp1[12], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[14], cospi[40], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[15], cospi[40], temp1[14], cosBit); + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = temp1[18]; + temp0[19] = temp1[19]; + temp0[20] = temp1[20]; + temp0[21] = temp1[21]; + temp0[22] = temp1[22]; + temp0[23] = temp1[23]; + temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[24], cospi[56], temp1[25], cosBit); + temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp1[25], cospi[56], temp1[24], cosBit); + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[26], cospi[24], temp1[27], cosBit); + temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp1[27], cospi[24], temp1[26], cosBit); + temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[28], cospi[8], temp1[29], cosBit); + temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[29], cospi[8], temp1[28], cosBit); + temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[30], cospi[40], temp1[31], cosBit); + temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[31], cospi[40], temp1[30], cosBit); + + // stage 7 + temp1[0] = temp0[0] + temp0[4]; + temp1[1] = temp0[1] + temp0[5]; + temp1[2] = temp0[2] + temp0[6]; + temp1[3] = temp0[3] + temp0[7]; + temp1[4] = -temp0[4] + temp0[0]; + temp1[5] = -temp0[5] + temp0[1]; + temp1[6] = -temp0[6] + temp0[2]; + temp1[7] = -temp0[7] + temp0[3]; + temp1[8] = temp0[8] + temp0[12]; + temp1[9] = temp0[9] + temp0[13]; + temp1[10] = temp0[10] + temp0[14]; + temp1[11] = temp0[11] + temp0[15]; + temp1[12] = -temp0[12] + temp0[8]; + temp1[13] = -temp0[13] + temp0[9]; + temp1[14] = -temp0[14] + temp0[10]; + temp1[15] = -temp0[15] + temp0[11]; + temp1[16] = temp0[16] + temp0[20]; + temp1[17] = temp0[17] + temp0[21]; + temp1[18] = temp0[18] + temp0[22]; + temp1[19] = temp0[19] + temp0[23]; + temp1[20] = -temp0[20] + temp0[16]; + temp1[21] = -temp0[21] + temp0[17]; + temp1[22] = -temp0[22] + temp0[18]; + temp1[23] = -temp0[23] + temp0[19]; + temp1[24] = temp0[24] + temp0[28]; + temp1[25] = temp0[25] + temp0[29]; + temp1[26] = temp0[26] + temp0[30]; + temp1[27] = temp0[27] + temp0[31]; + temp1[28] = -temp0[28] + temp0[24]; + temp1[29] = -temp0[29] + temp0[25]; + temp1[30] = -temp0[30] + temp0[26]; + temp1[31] = -temp0[31] + temp0[27]; + + // stage 8 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[4], cospi[48], temp1[5], cosBit); + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[5], cospi[48], temp1[4], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[6], cospi[16], temp1[7], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[7], cospi[16], temp1[6], cosBit); + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = temp1[10]; + temp0[11] = temp1[11]; + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[12], cospi[48], temp1[13], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[13], cospi[48], temp1[12], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[14], cospi[16], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[15], cospi[16], temp1[14], cosBit); + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = temp1[18]; + temp0[19] = temp1[19]; + temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[20], cospi[48], temp1[21], cosBit); + temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[21], cospi[48], temp1[20], cosBit); + temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[22], cospi[16], temp1[23], cosBit); + temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[23], cospi[16], temp1[22], cosBit); + temp0[24] = temp1[24]; + temp0[25] = temp1[25]; + temp0[26] = temp1[26]; + temp0[27] = temp1[27]; + temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[28], cospi[48], temp1[29], cosBit); + temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[29], cospi[48], temp1[28], cosBit); + temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[30], cospi[16], temp1[31], cosBit); + temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[31], cospi[16], temp1[30], cosBit); + + // stage 9 + temp1[0] = temp0[0] + temp0[2]; + temp1[1] = temp0[1] + temp0[3]; + temp1[2] = -temp0[2] + temp0[0]; + temp1[3] = -temp0[3] + temp0[1]; + temp1[4] = temp0[4] + temp0[6]; + temp1[5] = temp0[5] + temp0[7]; + temp1[6] = -temp0[6] + temp0[4]; + temp1[7] = -temp0[7] + temp0[5]; + temp1[8] = temp0[8] + temp0[10]; + temp1[9] = temp0[9] + temp0[11]; + temp1[10] = -temp0[10] + temp0[8]; + temp1[11] = -temp0[11] + temp0[9]; + temp1[12] = temp0[12] + temp0[14]; + temp1[13] = temp0[13] + temp0[15]; + temp1[14] = -temp0[14] + temp0[12]; + temp1[15] = -temp0[15] + temp0[13]; + temp1[16] = temp0[16] + temp0[18]; + temp1[17] = temp0[17] + temp0[19]; + temp1[18] = -temp0[18] + temp0[16]; + temp1[19] = -temp0[19] + temp0[17]; + temp1[20] = temp0[20] + temp0[22]; + temp1[21] = temp0[21] + temp0[23]; + temp1[22] = -temp0[22] + temp0[20]; + temp1[23] = -temp0[23] + temp0[21]; + temp1[24] = temp0[24] + temp0[26]; + temp1[25] = temp0[25] + temp0[27]; + temp1[26] = -temp0[26] + temp0[24]; + temp1[27] = -temp0[27] + temp0[25]; + temp1[28] = temp0[28] + temp0[30]; + temp1[29] = temp0[29] + temp0[31]; + temp1[30] = -temp0[30] + temp0[28]; + temp1[31] = -temp0[31] + temp0[29]; + + // stage 10 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[2], cospi[32], temp1[3], cosBit); + temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[3], cospi[32], temp1[2], cosBit); + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[7], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[7], cospi[32], temp1[6], cosBit); + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[10], cospi[32], temp1[11], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[11], cospi[32], temp1[10], cosBit); + temp0[12] = temp1[12]; + temp0[13] = temp1[13]; + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[14], cospi[32], temp1[15], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[15], cospi[32], temp1[14], cosBit); + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[18], cospi[32], temp1[19], cosBit); + temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[19], cospi[32], temp1[18], cosBit); + temp0[20] = temp1[20]; + temp0[21] = temp1[21]; + temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[22], cospi[32], temp1[23], cosBit); + temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[23], cospi[32], temp1[22], cosBit); + temp0[24] = temp1[24]; + temp0[25] = temp1[25]; + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[26], cospi[32], temp1[27], cosBit); + temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[27], cospi[32], temp1[26], cosBit); + temp0[28] = temp1[28]; + temp0[29] = temp1[29]; + temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[30], cospi[32], temp1[31], cosBit); + temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[31], cospi[32], temp1[30], cosBit); + + // stage 11 + outputRef = temp0[0]; + Unsafe.Add(ref outputRef, 1) = -temp0[16]; + Unsafe.Add(ref outputRef, 2) = temp0[24]; + Unsafe.Add(ref outputRef, 3) = -temp0[8]; + Unsafe.Add(ref outputRef, 4) = temp0[12]; + Unsafe.Add(ref outputRef, 5) = -temp0[28]; + Unsafe.Add(ref outputRef, 6) = temp0[20]; + Unsafe.Add(ref outputRef, 7) = -temp0[4]; + Unsafe.Add(ref outputRef, 8) = temp0[6]; + Unsafe.Add(ref outputRef, 9) = -temp0[22]; + Unsafe.Add(ref outputRef, 10) = temp0[30]; + Unsafe.Add(ref outputRef, 11) = -temp0[14]; + Unsafe.Add(ref outputRef, 12) = temp0[10]; + Unsafe.Add(ref outputRef, 13) = -temp0[26]; + Unsafe.Add(ref outputRef, 14) = temp0[18]; + Unsafe.Add(ref outputRef, 15) = -temp0[2]; + Unsafe.Add(ref outputRef, 16) = temp0[3]; + Unsafe.Add(ref outputRef, 17) = -temp0[19]; + Unsafe.Add(ref outputRef, 18) = temp0[27]; + Unsafe.Add(ref outputRef, 19) = -temp0[11]; + Unsafe.Add(ref outputRef, 20) = temp0[15]; + Unsafe.Add(ref outputRef, 21) = -temp0[31]; + Unsafe.Add(ref outputRef, 22) = temp0[23]; + Unsafe.Add(ref outputRef, 23) = -temp0[7]; + Unsafe.Add(ref outputRef, 24) = temp0[5]; + Unsafe.Add(ref outputRef, 25) = -temp0[21]; + Unsafe.Add(ref outputRef, 26) = temp0[29]; + Unsafe.Add(ref outputRef, 27) = -temp0[13]; + Unsafe.Add(ref outputRef, 28) = temp0[9]; + Unsafe.Add(ref outputRef, 29) = -temp0[25]; + Unsafe.Add(ref outputRef, 30) = temp0[17]; + Unsafe.Add(ref outputRef, 31) = -temp0[1]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs index 8dcea3770b..2b4952873a 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs @@ -1,10 +1,72 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Adst4Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output, cosBit); + + private static void TransformScalar(ref int input, ref int output, int cosBit) + { + Span sinpi = Av1SinusConstants.SinusPi(cosBit); + int x0, x1, x2, x3; + int s0, s1, s2, s3, s4, s5, s6, s7; + + // stage 0 + x0 = input; + x1 = Unsafe.Add(ref input, 1); + x2 = Unsafe.Add(ref input, 2); + x3 = Unsafe.Add(ref input, 3); + + if (!(x0 != 0 | x1 != 0 | x2 != 0 | x3 != 0)) + { + output = 0; + Unsafe.Add(ref output, 1) = 0; + Unsafe.Add(ref output, 2) = 0; + Unsafe.Add(ref output, 3) = 0; + return; + } + + // stage 1 + s0 = sinpi[1] * x0; + s1 = sinpi[4] * x0; + s2 = sinpi[2] * x1; + s3 = sinpi[1] * x1; + s4 = sinpi[3] * x2; + s5 = sinpi[4] * x3; + s6 = sinpi[2] * x3; + s7 = x0 + x1; + + // stage 2 + s7 -= x3; + + // stage 3 + x0 = s0 + s2; + x1 = sinpi[3] * s7; + x2 = s1 - s3; + x3 = s4; + + // stage 4 + x0 += s5; + x2 += s6; + + // stage 5 + s0 = x0 + x3; + s1 = x1; + s2 = x2 - x3; + s3 = x2 - x0; + + // stage 6 + s3 += x3; + + // 1-D transform scaling factor is sqrt(2). + output = Av1Math.RoundShift(s0, cosBit); + Unsafe.Add(ref output, 1) = Av1Math.RoundShift(s1, cosBit); + Unsafe.Add(ref output, 2) = Av1Math.RoundShift(s2, cosBit); + Unsafe.Add(ref output, 3) = Av1Math.RoundShift(s3, cosBit); + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs index 3da6fc593b..8019df88c5 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs @@ -1,10 +1,92 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Adst8Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output, cosBit); + + private static void TransformScalar(ref int input, ref int output, int cosBit) + { + Span temp0 = stackalloc int[8]; + Span temp1 = stackalloc int[8]; + + // stage 0; + + // stage 1; + Guard.IsFalse(output == input, nameof(output), "Cannot operate on same buffer for input and output."); + temp0[0] = input; + temp0[1] = -Unsafe.Add(ref input, 7); + temp0[2] = -Unsafe.Add(ref input, 3); + temp0[3] = Unsafe.Add(ref input, 4); + temp0[4] = -Unsafe.Add(ref input, 1); + temp0[5] = Unsafe.Add(ref input, 6); + temp0[6] = Unsafe.Add(ref input, 2); + temp0[7] = -Unsafe.Add(ref input, 5); + + // stage 2 + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[2], cospi[32], temp0[3], cosBit); + temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[2], -cospi[32], temp0[3], cosBit); + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], cospi[32], temp0[7], cosBit); + temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], -cospi[32], temp0[7], cosBit); + + // stage 3 + temp0[0] = temp1[0] + temp1[2]; + temp0[1] = temp1[1] + temp1[3]; + temp0[2] = temp1[0] - temp1[2]; + temp0[3] = temp1[1] - temp1[3]; + temp0[4] = temp1[4] + temp1[6]; + temp0[5] = temp1[5] + temp1[7]; + temp0[6] = temp1[4] - temp1[6]; + temp0[7] = temp1[5] - temp1[7]; + + // stage 4 + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[4], cospi[48], temp0[5], cosBit); + temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[4], -cospi[16], temp0[5], cosBit); + temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[6], cospi[16], temp0[7], cosBit); + temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[6], cospi[48], temp0[7], cosBit); + + // stage 5 + temp0[0] = temp1[0] + temp1[4]; + temp0[1] = temp1[1] + temp1[5]; + temp0[2] = temp1[2] + temp1[6]; + temp0[3] = temp1[3] + temp1[7]; + temp0[4] = temp1[0] - temp1[4]; + temp0[5] = temp1[1] - temp1[5]; + temp0[6] = temp1[2] - temp1[6]; + temp0[7] = temp1[3] - temp1[7]; + + // stage 6 + temp1[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[4], temp0[0], cospi[60], temp0[1], cosBit); + temp1[1] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[0], -cospi[4], temp0[1], cosBit); + temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[20], temp0[2], cospi[44], temp0[3], cosBit); + temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[2], -cospi[20], temp0[3], cosBit); + temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[36], temp0[4], cospi[28], temp0[5], cosBit); + temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[4], -cospi[36], temp0[5], cosBit); + temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[52], temp0[6], cospi[12], temp0[7], cosBit); + temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[6], -cospi[52], temp0[7], cosBit); + + // stage 7 + output = temp1[1]; + Unsafe.Add(ref output, 1) = temp1[6]; + Unsafe.Add(ref output, 2) = temp1[3]; + Unsafe.Add(ref output, 3) = temp1[4]; + Unsafe.Add(ref output, 4) = temp1[5]; + Unsafe.Add(ref output, 5) = temp1[2]; + Unsafe.Add(ref output, 6) = temp1[7]; + Unsafe.Add(ref output, 7) = temp1[0]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs index 891635609e..1173c56de7 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs @@ -1,10 +1,147 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Dct16Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output, cosBit); + + private static void TransformScalar(ref int input, ref int output, int cosBit) + { + Span temp0 = stackalloc int[16]; + Span temp1 = stackalloc int[16]; + + // stage 0; + + // stage 1; + temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 15); + temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 14); + temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 13); + temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 12); + temp0[4] = Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 11); + temp0[5] = Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 10); + temp0[6] = Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 9); + temp0[7] = Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 8); + temp0[8] = -Unsafe.Add(ref input, 8) + Unsafe.Add(ref input, 7); + temp0[9] = -Unsafe.Add(ref input, 9) + Unsafe.Add(ref input, 6); + temp0[10] = -Unsafe.Add(ref input, 10) + Unsafe.Add(ref input, 5); + temp0[11] = -Unsafe.Add(ref input, 11) + Unsafe.Add(ref input, 4); + temp0[12] = -Unsafe.Add(ref input, 12) + Unsafe.Add(ref input, 3); + temp0[13] = -Unsafe.Add(ref input, 13) + Unsafe.Add(ref input, 2); + temp0[14] = -Unsafe.Add(ref input, 14) + Unsafe.Add(ref input, 1); + temp0[15] = -Unsafe.Add(ref input, 15) + Unsafe.Add(ref input, 0); + + // stage 2 + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + temp1[0] = temp0[0] + temp0[7]; + temp1[1] = temp0[1] + temp0[6]; + temp1[2] = temp0[2] + temp0[5]; + temp1[3] = temp0[3] + temp0[4]; + temp1[4] = -temp0[4] + temp0[3]; + temp1[5] = -temp0[5] + temp0[2]; + temp1[6] = -temp0[6] + temp0[1]; + temp1[7] = -temp0[7] + temp0[0]; + temp1[8] = temp0[8]; + temp1[9] = temp0[9]; + temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[10], cospi[32], temp0[13], cosBit); + temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[11], cospi[32], temp0[12], cosBit); + temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[12], cospi[32], temp0[11], cosBit); + temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[13], cospi[32], temp0[10], cosBit); + temp1[14] = temp0[14]; + temp1[15] = temp0[15]; + + // stage 3 + temp0[0] = temp1[0] + temp1[3]; + temp0[1] = temp1[1] + temp1[2]; + temp0[2] = -temp1[2] + temp1[1]; + temp0[3] = -temp1[3] + temp1[0]; + temp0[4] = temp1[4]; + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[5], cospi[32], temp1[6], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[5], cosBit); + temp0[7] = temp1[7]; + temp0[8] = temp1[8] + temp1[11]; + temp0[9] = temp1[9] + temp1[10]; + temp0[10] = -temp1[10] + temp1[9]; + temp0[11] = -temp1[11] + temp1[8]; + temp0[12] = -temp1[12] + temp1[15]; + temp0[13] = -temp1[13] + temp1[14]; + temp0[14] = temp1[14] + temp1[13]; + temp0[15] = temp1[15] + temp1[12]; + + // stage 4 + temp1[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[0], cospi[32], temp0[1], cosBit); + temp1[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[1], cospi[32], temp0[0], cosBit); + temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[2], cospi[16], temp0[3], cosBit); + temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[3], -cospi[16], temp0[2], cosBit); + temp1[4] = temp0[4] + temp0[5]; + temp1[5] = -temp0[5] + temp0[4]; + temp1[6] = -temp0[6] + temp0[7]; + temp1[7] = temp0[7] + temp0[6]; + temp1[8] = temp0[8]; + temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[9], cospi[48], temp0[14], cosBit); + temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[10], -cospi[16], temp0[13], cosBit); + temp1[11] = temp0[11]; + temp1[12] = temp0[12]; + temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[13], -cospi[16], temp0[10], cosBit); + temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[14], cospi[48], temp0[9], cosBit); + temp1[15] = temp0[15]; + + // stage 5 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[4], cospi[8], temp1[7], cosBit); + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[5], cospi[40], temp1[6], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[6], -cospi[40], temp1[5], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[7], -cospi[8], temp1[4], cosBit); + temp0[8] = temp1[8] + temp1[9]; + temp0[9] = -temp1[9] + temp1[8]; + temp0[10] = -temp1[10] + temp1[11]; + temp0[11] = temp1[11] + temp1[10]; + temp0[12] = temp1[12] + temp1[13]; + temp0[13] = -temp1[13] + temp1[12]; + temp0[14] = -temp1[14] + temp1[15]; + temp0[15] = temp1[15] + temp1[14]; + + // stage 6 + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = temp0[6]; + temp1[7] = temp0[7]; + temp1[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[8], cospi[4], temp0[15], cosBit); + temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[9], cospi[36], temp0[14], cosBit); + temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[10], cospi[20], temp0[13], cosBit); + temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[11], cospi[52], temp0[12], cosBit); + temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[12], -cospi[52], temp0[11], cosBit); + temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[13], -cospi[20], temp0[10], cosBit); + temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[14], -cospi[36], temp0[9], cosBit); + temp1[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[15], -cospi[4], temp0[8], cosBit); + + // stage 7 + output = temp1[0]; + Unsafe.Add(ref output, 1) = temp1[8]; + Unsafe.Add(ref output, 2) = temp1[4]; + Unsafe.Add(ref output, 3) = temp1[12]; + Unsafe.Add(ref output, 4) = temp1[2]; + Unsafe.Add(ref output, 5) = temp1[10]; + Unsafe.Add(ref output, 6) = temp1[6]; + Unsafe.Add(ref output, 7) = temp1[14]; + Unsafe.Add(ref output, 8) = temp1[1]; + Unsafe.Add(ref output, 9) = temp1[9]; + Unsafe.Add(ref output, 10) = temp1[5]; + Unsafe.Add(ref output, 11) = temp1[13]; + Unsafe.Add(ref output, 12) = temp1[3]; + Unsafe.Add(ref output, 13) = temp1[11]; + Unsafe.Add(ref output, 14) = temp1[7]; + Unsafe.Add(ref output, 15) = temp1[15]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs index aa87a0663e..c0068072d3 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs @@ -1,10 +1,327 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Dct32Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output, cosBit); + + private static void TransformScalar(ref int input, ref int output, int cosBit) + { + Span temp0 = stackalloc int[32]; + Span temp1 = stackalloc int[32]; + + // stage 0; + + // stage 1; + temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 31); + temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 30); + temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 29); + temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 28); + temp0[4] = Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 27); + temp0[5] = Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 26); + temp0[6] = Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 25); + temp0[7] = Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 24); + temp0[8] = Unsafe.Add(ref input, 8) + Unsafe.Add(ref input, 23); + temp0[9] = Unsafe.Add(ref input, 9) + Unsafe.Add(ref input, 22); + temp0[10] = Unsafe.Add(ref input, 10) + Unsafe.Add(ref input, 21); + temp0[11] = Unsafe.Add(ref input, 11) + Unsafe.Add(ref input, 20); + temp0[12] = Unsafe.Add(ref input, 12) + Unsafe.Add(ref input, 19); + temp0[13] = Unsafe.Add(ref input, 13) + Unsafe.Add(ref input, 18); + temp0[14] = Unsafe.Add(ref input, 14) + Unsafe.Add(ref input, 17); + temp0[15] = Unsafe.Add(ref input, 15) + Unsafe.Add(ref input, 16); + temp0[16] = -Unsafe.Add(ref input, 16) + Unsafe.Add(ref input, 15); + temp0[17] = -Unsafe.Add(ref input, 17) + Unsafe.Add(ref input, 14); + temp0[18] = -Unsafe.Add(ref input, 18) + Unsafe.Add(ref input, 13); + temp0[19] = -Unsafe.Add(ref input, 19) + Unsafe.Add(ref input, 12); + temp0[20] = -Unsafe.Add(ref input, 20) + Unsafe.Add(ref input, 11); + temp0[21] = -Unsafe.Add(ref input, 21) + Unsafe.Add(ref input, 10); + temp0[22] = -Unsafe.Add(ref input, 22) + Unsafe.Add(ref input, 9); + temp0[23] = -Unsafe.Add(ref input, 23) + Unsafe.Add(ref input, 8); + temp0[24] = -Unsafe.Add(ref input, 24) + Unsafe.Add(ref input, 7); + temp0[25] = -Unsafe.Add(ref input, 25) + Unsafe.Add(ref input, 6); + temp0[26] = -Unsafe.Add(ref input, 26) + Unsafe.Add(ref input, 5); + temp0[27] = -Unsafe.Add(ref input, 27) + Unsafe.Add(ref input, 4); + temp0[28] = -Unsafe.Add(ref input, 28) + Unsafe.Add(ref input, 3); + temp0[29] = -Unsafe.Add(ref input, 29) + Unsafe.Add(ref input, 2); + temp0[30] = -Unsafe.Add(ref input, 30) + Unsafe.Add(ref input, 1); + temp0[31] = -Unsafe.Add(ref input, 31) + Unsafe.Add(ref input, 0); + + // stage 2 + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + temp1[0] = temp0[0] + temp0[15]; + temp1[1] = temp0[1] + temp0[14]; + temp1[2] = temp0[2] + temp0[13]; + temp1[3] = temp0[3] + temp0[12]; + temp1[4] = temp0[4] + temp0[11]; + temp1[5] = temp0[5] + temp0[10]; + temp1[6] = temp0[6] + temp0[9]; + temp1[7] = temp0[7] + temp0[8]; + temp1[8] = -temp0[8] + temp0[7]; + temp1[9] = -temp0[9] + temp0[6]; + temp1[10] = -temp0[10] + temp0[5]; + temp1[11] = -temp0[11] + temp0[4]; + temp1[12] = -temp0[12] + temp0[3]; + temp1[13] = -temp0[13] + temp0[2]; + temp1[14] = -temp0[14] + temp0[1]; + temp1[15] = -temp0[15] + temp0[0]; + temp1[16] = temp0[16]; + temp1[17] = temp0[17]; + temp1[18] = temp0[18]; + temp1[19] = temp0[19]; + temp1[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[20], cospi[32], temp0[27], cosBit); + temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[21], cospi[32], temp0[26], cosBit); + temp1[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[22], cospi[32], temp0[25], cosBit); + temp1[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[23], cospi[32], temp0[24], cosBit); + temp1[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[24], cospi[32], temp0[23], cosBit); + temp1[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[25], cospi[32], temp0[22], cosBit); + temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[26], cospi[32], temp0[21], cosBit); + temp1[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[27], cospi[32], temp0[20], cosBit); + temp1[28] = temp0[28]; + temp1[29] = temp0[29]; + temp1[30] = temp0[30]; + temp1[31] = temp0[31]; + + // stage 3 + temp0[0] = temp1[0] + temp1[7]; + temp0[1] = temp1[1] + temp1[6]; + temp0[2] = temp1[2] + temp1[5]; + temp0[3] = temp1[3] + temp1[4]; + temp0[4] = -temp1[4] + temp1[3]; + temp0[5] = -temp1[5] + temp1[2]; + temp0[6] = -temp1[6] + temp1[1]; + temp0[7] = -temp1[7] + temp1[0]; + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[10], cospi[32], temp1[13], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[11], cospi[32], temp1[12], cosBit); + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[12], cospi[32], temp1[11], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[13], cospi[32], temp1[10], cosBit); + temp0[14] = temp1[14]; + temp0[15] = temp1[15]; + temp0[16] = temp1[16] + temp1[23]; + temp0[17] = temp1[17] + temp1[22]; + temp0[18] = temp1[18] + temp1[21]; + temp0[19] = temp1[19] + temp1[20]; + temp0[20] = -temp1[20] + temp1[19]; + temp0[21] = -temp1[21] + temp1[18]; + temp0[22] = -temp1[22] + temp1[17]; + temp0[23] = -temp1[23] + temp1[16]; + temp0[24] = -temp1[24] + temp1[31]; + temp0[25] = -temp1[25] + temp1[30]; + temp0[26] = -temp1[26] + temp1[29]; + temp0[27] = -temp1[27] + temp1[28]; + temp0[28] = temp1[28] + temp1[27]; + temp0[29] = temp1[29] + temp1[26]; + temp0[30] = temp1[30] + temp1[25]; + temp0[31] = temp1[31] + temp1[24]; + + // stage 4 + temp1[0] = temp0[0] + temp0[3]; + temp1[1] = temp0[1] + temp0[2]; + temp1[2] = -temp0[2] + temp0[1]; + temp1[3] = -temp0[3] + temp0[0]; + temp1[4] = temp0[4]; + temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[5], cospi[32], temp0[6], cosBit); + temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], cospi[32], temp0[5], cosBit); + temp1[7] = temp0[7]; + temp1[8] = temp0[8] + temp0[11]; + temp1[9] = temp0[9] + temp0[10]; + temp1[10] = -temp0[10] + temp0[9]; + temp1[11] = -temp0[11] + temp0[8]; + temp1[12] = -temp0[12] + temp0[15]; + temp1[13] = -temp0[13] + temp0[14]; + temp1[14] = temp0[14] + temp0[13]; + temp1[15] = temp0[15] + temp0[12]; + temp1[16] = temp0[16]; + temp1[17] = temp0[17]; + temp1[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[18], cospi[48], temp0[29], cosBit); + temp1[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[19], cospi[48], temp0[28], cosBit); + temp1[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[20], -cospi[16], temp0[27], cosBit); + temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[21], -cospi[16], temp0[26], cosBit); + temp1[22] = temp0[22]; + temp1[23] = temp0[23]; + temp1[24] = temp0[24]; + temp1[25] = temp0[25]; + temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[26], -cospi[16], temp0[21], cosBit); + temp1[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[27], -cospi[16], temp0[20], cosBit); + temp1[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[28], cospi[48], temp0[19], cosBit); + temp1[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[29], cospi[48], temp0[18], cosBit); + temp1[30] = temp0[30]; + temp1[31] = temp0[31]; + + // stage 5 + temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[0], cospi[32], temp1[1], cosBit); + temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[1], cospi[32], temp1[0], cosBit); + temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[2], cospi[16], temp1[3], cosBit); + temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[3], -cospi[16], temp1[2], cosBit); + temp0[4] = temp1[4] + temp1[5]; + temp0[5] = -temp1[5] + temp1[4]; + temp0[6] = -temp1[6] + temp1[7]; + temp0[7] = temp1[7] + temp1[6]; + temp0[8] = temp1[8]; + temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[9], cospi[48], temp1[14], cosBit); + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[10], -cospi[16], temp1[13], cosBit); + temp0[11] = temp1[11]; + temp0[12] = temp1[12]; + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[13], -cospi[16], temp1[10], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[14], cospi[48], temp1[9], cosBit); + temp0[15] = temp1[15]; + temp0[16] = temp1[16] + temp1[19]; + temp0[17] = temp1[17] + temp1[18]; + temp0[18] = -temp1[18] + temp1[17]; + temp0[19] = -temp1[19] + temp1[16]; + temp0[20] = -temp1[20] + temp1[23]; + temp0[21] = -temp1[21] + temp1[22]; + temp0[22] = temp1[22] + temp1[21]; + temp0[23] = temp1[23] + temp1[20]; + temp0[24] = temp1[24] + temp1[27]; + temp0[25] = temp1[25] + temp1[26]; + temp0[26] = -temp1[26] + temp1[25]; + temp0[27] = -temp1[27] + temp1[24]; + temp0[28] = -temp1[28] + temp1[31]; + temp0[29] = -temp1[29] + temp1[30]; + temp0[30] = temp1[30] + temp1[29]; + temp0[31] = temp1[31] + temp1[28]; + + // stage 6 + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[4], cospi[8], temp0[7], cosBit); + temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[5], cospi[40], temp0[6], cosBit); + temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[6], -cospi[40], temp0[5], cosBit); + temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[7], -cospi[8], temp0[4], cosBit); + temp1[8] = temp0[8] + temp0[9]; + temp1[9] = -temp0[9] + temp0[8]; + temp1[10] = -temp0[10] + temp0[11]; + temp1[11] = temp0[11] + temp0[10]; + temp1[12] = temp0[12] + temp0[13]; + temp1[13] = -temp0[13] + temp0[12]; + temp1[14] = -temp0[14] + temp0[15]; + temp1[15] = temp0[15] + temp0[14]; + temp1[16] = temp0[16]; + temp1[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp0[17], cospi[56], temp0[30], cosBit); + temp1[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp0[18], -cospi[8], temp0[29], cosBit); + temp1[19] = temp0[19]; + temp1[20] = temp0[20]; + temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp0[21], cospi[24], temp0[26], cosBit); + temp1[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp0[22], -cospi[40], temp0[25], cosBit); + temp1[23] = temp0[23]; + temp1[24] = temp0[24]; + temp1[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[25], -cospi[40], temp0[22], cosBit); + temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp0[26], cospi[24], temp0[21], cosBit); + temp1[27] = temp0[27]; + temp1[28] = temp0[28]; + temp1[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[29], -cospi[8], temp0[18], cosBit); + temp1[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp0[30], cospi[56], temp0[17], cosBit); + temp1[31] = temp0[31]; + + // stage 7 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp1[8], cospi[4], temp1[15], cosBit); + temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp1[9], cospi[36], temp1[14], cosBit); + temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp1[10], cospi[20], temp1[13], cosBit); + temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp1[11], cospi[52], temp1[12], cosBit); + temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp1[12], -cospi[52], temp1[11], cosBit); + temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp1[13], -cospi[20], temp1[10], cosBit); + temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp1[14], -cospi[36], temp1[9], cosBit); + temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp1[15], -cospi[4], temp1[8], cosBit); + temp0[16] = temp1[16] + temp1[17]; + temp0[17] = -temp1[17] + temp1[16]; + temp0[18] = -temp1[18] + temp1[19]; + temp0[19] = temp1[19] + temp1[18]; + temp0[20] = temp1[20] + temp1[21]; + temp0[21] = -temp1[21] + temp1[20]; + temp0[22] = -temp1[22] + temp1[23]; + temp0[23] = temp1[23] + temp1[22]; + temp0[24] = temp1[24] + temp1[25]; + temp0[25] = -temp1[25] + temp1[24]; + temp0[26] = -temp1[26] + temp1[27]; + temp0[27] = temp1[27] + temp1[26]; + temp0[28] = temp1[28] + temp1[29]; + temp0[29] = -temp1[29] + temp1[28]; + temp0[30] = -temp1[30] + temp1[31]; + temp0[31] = temp1[31] + temp1[30]; + + // stage 8 + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = temp0[6]; + temp1[7] = temp0[7]; + temp1[8] = temp0[8]; + temp1[9] = temp0[9]; + temp1[10] = temp0[10]; + temp1[11] = temp0[11]; + temp1[12] = temp0[12]; + temp1[13] = temp0[13]; + temp1[14] = temp0[14]; + temp1[15] = temp0[15]; + temp1[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp0[16], cospi[2], temp0[31], cosBit); + temp1[17] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp0[17], cospi[34], temp0[30], cosBit); + temp1[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp0[18], cospi[18], temp0[29], cosBit); + temp1[19] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp0[19], cospi[50], temp0[28], cosBit); + temp1[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp0[20], cospi[10], temp0[27], cosBit); + temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp0[21], cospi[42], temp0[26], cosBit); + temp1[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp0[22], cospi[26], temp0[25], cosBit); + temp1[23] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp0[23], cospi[58], temp0[24], cosBit); + temp1[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp0[24], -cospi[58], temp0[23], cosBit); + temp1[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp0[25], -cospi[26], temp0[22], cosBit); + temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp0[26], -cospi[42], temp0[21], cosBit); + temp1[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp0[27], -cospi[10], temp0[20], cosBit); + temp1[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp0[28], -cospi[50], temp0[19], cosBit); + temp1[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp0[29], -cospi[18], temp0[18], cosBit); + temp1[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp0[30], -cospi[34], temp0[17], cosBit); + temp1[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp0[31], -cospi[2], temp0[16], cosBit); + + // stage 9 + Unsafe.Add(ref output, 0) = temp1[0]; + Unsafe.Add(ref output, 1) = temp1[16]; + Unsafe.Add(ref output, 2) = temp1[8]; + Unsafe.Add(ref output, 3) = temp1[24]; + Unsafe.Add(ref output, 4) = temp1[4]; + Unsafe.Add(ref output, 5) = temp1[20]; + Unsafe.Add(ref output, 6) = temp1[12]; + Unsafe.Add(ref output, 7) = temp1[28]; + Unsafe.Add(ref output, 8) = temp1[2]; + Unsafe.Add(ref output, 9) = temp1[18]; + Unsafe.Add(ref output, 10) = temp1[10]; + Unsafe.Add(ref output, 11) = temp1[26]; + Unsafe.Add(ref output, 12) = temp1[6]; + Unsafe.Add(ref output, 13) = temp1[22]; + Unsafe.Add(ref output, 14) = temp1[14]; + Unsafe.Add(ref output, 15) = temp1[30]; + Unsafe.Add(ref output, 16) = temp1[1]; + Unsafe.Add(ref output, 17) = temp1[17]; + Unsafe.Add(ref output, 18) = temp1[9]; + Unsafe.Add(ref output, 19) = temp1[25]; + Unsafe.Add(ref output, 20) = temp1[5]; + Unsafe.Add(ref output, 21) = temp1[21]; + Unsafe.Add(ref output, 22) = temp1[13]; + Unsafe.Add(ref output, 23) = temp1[29]; + Unsafe.Add(ref output, 24) = temp1[3]; + Unsafe.Add(ref output, 25) = temp1[19]; + Unsafe.Add(ref output, 26) = temp1[11]; + Unsafe.Add(ref output, 27) = temp1[27]; + Unsafe.Add(ref output, 28) = temp1[7]; + Unsafe.Add(ref output, 29) = temp1[23]; + Unsafe.Add(ref output, 30) = temp1[15]; + Unsafe.Add(ref output, 31) = temp1[31]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs index 15c3f3ffe9..41a5234594 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs @@ -33,10 +33,10 @@ internal class Av1Dct4Forward1dTransformer : IAv1Forward1dTransformer output3 = -Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 0); // stage 2 - step0 = HalfBtf(cospi[32], output, cospi[32], output1, cosBit); - step1 = HalfBtf(-cospi[32], output1, cospi[32], output, cosBit); - step2 = HalfBtf(cospi[48], output2, cospi[16], output3, cosBit); - step3 = HalfBtf(cospi[48], output3, -cospi[16], output2, cosBit); + step0 = HalfButterfly(cospi[32], output, cospi[32], output1, cosBit); + step1 = HalfButterfly(-cospi[32], output1, cospi[32], output, cosBit); + step2 = HalfButterfly(cospi[48], output2, cospi[16], output3, cosBit); + step3 = HalfButterfly(cospi[48], output3, -cospi[16], output2, cosBit); // stage 3 output = step0; @@ -45,7 +45,7 @@ internal class Av1Dct4Forward1dTransformer : IAv1Forward1dTransformer output3 = step3; } - private static int HalfBtf(int w0, int in0, int w1, int in1, int bit) + internal static int HalfButterfly(int w0, int in0, int w1, int in1, int bit) { long result64 = (long)(w0 * in0) + (w1 * in1); long intermediate = result64 + (1L << (bit - 1)); diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs index 5dcc4ab7f0..c9149f2973 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs @@ -1,10 +1,747 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Dct64Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransforScalar(ref input, ref output, cosBit); + + private static void TransforScalar(ref int input, ref int output, int cosBit) + { + Span temp0 = stackalloc int[64]; + Span temp1 = stackalloc int[64]; + + // stage 0; + + // stage 1; + temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 63); + temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 62); + temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 61); + temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 60); + temp0[4] = Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 59); + temp0[5] = Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 58); + temp0[6] = Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 57); + temp0[7] = Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 56); + temp0[8] = Unsafe.Add(ref input, 8) + Unsafe.Add(ref input, 55); + temp0[9] = Unsafe.Add(ref input, 9) + Unsafe.Add(ref input, 54); + temp0[10] = Unsafe.Add(ref input, 10) + Unsafe.Add(ref input, 53); + temp0[11] = Unsafe.Add(ref input, 11) + Unsafe.Add(ref input, 52); + temp0[12] = Unsafe.Add(ref input, 12) + Unsafe.Add(ref input, 51); + temp0[13] = Unsafe.Add(ref input, 13) + Unsafe.Add(ref input, 50); + temp0[14] = Unsafe.Add(ref input, 14) + Unsafe.Add(ref input, 49); + temp0[15] = Unsafe.Add(ref input, 15) + Unsafe.Add(ref input, 48); + temp0[16] = Unsafe.Add(ref input, 16) + Unsafe.Add(ref input, 47); + temp0[17] = Unsafe.Add(ref input, 17) + Unsafe.Add(ref input, 46); + temp0[18] = Unsafe.Add(ref input, 18) + Unsafe.Add(ref input, 45); + temp0[19] = Unsafe.Add(ref input, 19) + Unsafe.Add(ref input, 44); + temp0[20] = Unsafe.Add(ref input, 20) + Unsafe.Add(ref input, 43); + temp0[21] = Unsafe.Add(ref input, 21) + Unsafe.Add(ref input, 42); + temp0[22] = Unsafe.Add(ref input, 22) + Unsafe.Add(ref input, 41); + temp0[23] = Unsafe.Add(ref input, 23) + Unsafe.Add(ref input, 40); + temp0[24] = Unsafe.Add(ref input, 24) + Unsafe.Add(ref input, 39); + temp0[25] = Unsafe.Add(ref input, 25) + Unsafe.Add(ref input, 38); + temp0[26] = Unsafe.Add(ref input, 26) + Unsafe.Add(ref input, 37); + temp0[27] = Unsafe.Add(ref input, 27) + Unsafe.Add(ref input, 36); + temp0[28] = Unsafe.Add(ref input, 28) + Unsafe.Add(ref input, 35); + temp0[29] = Unsafe.Add(ref input, 29) + Unsafe.Add(ref input, 34); + temp0[30] = Unsafe.Add(ref input, 30) + Unsafe.Add(ref input, 33); + temp0[31] = Unsafe.Add(ref input, 31) + Unsafe.Add(ref input, 32); + temp0[32] = -Unsafe.Add(ref input, 32) + Unsafe.Add(ref input, 31); + temp0[33] = -Unsafe.Add(ref input, 33) + Unsafe.Add(ref input, 30); + temp0[34] = -Unsafe.Add(ref input, 34) + Unsafe.Add(ref input, 29); + temp0[35] = -Unsafe.Add(ref input, 35) + Unsafe.Add(ref input, 28); + temp0[36] = -Unsafe.Add(ref input, 36) + Unsafe.Add(ref input, 27); + temp0[37] = -Unsafe.Add(ref input, 37) + Unsafe.Add(ref input, 26); + temp0[38] = -Unsafe.Add(ref input, 38) + Unsafe.Add(ref input, 25); + temp0[39] = -Unsafe.Add(ref input, 39) + Unsafe.Add(ref input, 24); + temp0[40] = -Unsafe.Add(ref input, 40) + Unsafe.Add(ref input, 23); + temp0[41] = -Unsafe.Add(ref input, 41) + Unsafe.Add(ref input, 22); + temp0[42] = -Unsafe.Add(ref input, 42) + Unsafe.Add(ref input, 21); + temp0[43] = -Unsafe.Add(ref input, 43) + Unsafe.Add(ref input, 20); + temp0[44] = -Unsafe.Add(ref input, 44) + Unsafe.Add(ref input, 19); + temp0[45] = -Unsafe.Add(ref input, 45) + Unsafe.Add(ref input, 18); + temp0[46] = -Unsafe.Add(ref input, 46) + Unsafe.Add(ref input, 17); + temp0[47] = -Unsafe.Add(ref input, 47) + Unsafe.Add(ref input, 16); + temp0[48] = -Unsafe.Add(ref input, 48) + Unsafe.Add(ref input, 15); + temp0[49] = -Unsafe.Add(ref input, 49) + Unsafe.Add(ref input, 14); + temp0[50] = -Unsafe.Add(ref input, 50) + Unsafe.Add(ref input, 13); + temp0[51] = -Unsafe.Add(ref input, 51) + Unsafe.Add(ref input, 12); + temp0[52] = -Unsafe.Add(ref input, 52) + Unsafe.Add(ref input, 11); + temp0[53] = -Unsafe.Add(ref input, 53) + Unsafe.Add(ref input, 10); + temp0[54] = -Unsafe.Add(ref input, 54) + Unsafe.Add(ref input, 9); + temp0[55] = -Unsafe.Add(ref input, 55) + Unsafe.Add(ref input, 8); + temp0[56] = -Unsafe.Add(ref input, 56) + Unsafe.Add(ref input, 7); + temp0[57] = -Unsafe.Add(ref input, 57) + Unsafe.Add(ref input, 6); + temp0[58] = -Unsafe.Add(ref input, 58) + Unsafe.Add(ref input, 5); + temp0[59] = -Unsafe.Add(ref input, 59) + Unsafe.Add(ref input, 4); + temp0[60] = -Unsafe.Add(ref input, 60) + Unsafe.Add(ref input, 3); + temp0[61] = -Unsafe.Add(ref input, 61) + Unsafe.Add(ref input, 2); + temp0[62] = -Unsafe.Add(ref input, 62) + Unsafe.Add(ref input, 1); + temp0[63] = -Unsafe.Add(ref input, 63) + Unsafe.Add(ref input, 0); + + // stage 2 + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + temp1[0] = temp0[0] + temp0[31]; + temp1[1] = temp0[1] + temp0[30]; + temp1[2] = temp0[2] + temp0[29]; + temp1[3] = temp0[3] + temp0[28]; + temp1[4] = temp0[4] + temp0[27]; + temp1[5] = temp0[5] + temp0[26]; + temp1[6] = temp0[6] + temp0[25]; + temp1[7] = temp0[7] + temp0[24]; + temp1[8] = temp0[8] + temp0[23]; + temp1[9] = temp0[9] + temp0[22]; + temp1[10] = temp0[10] + temp0[21]; + temp1[11] = temp0[11] + temp0[20]; + temp1[12] = temp0[12] + temp0[19]; + temp1[13] = temp0[13] + temp0[18]; + temp1[14] = temp0[14] + temp0[17]; + temp1[15] = temp0[15] + temp0[16]; + temp1[16] = -temp0[16] + temp0[15]; + temp1[17] = -temp0[17] + temp0[14]; + temp1[18] = -temp0[18] + temp0[13]; + temp1[19] = -temp0[19] + temp0[12]; + temp1[20] = -temp0[20] + temp0[11]; + temp1[21] = -temp0[21] + temp0[10]; + temp1[22] = -temp0[22] + temp0[9]; + temp1[23] = -temp0[23] + temp0[8]; + temp1[24] = -temp0[24] + temp0[7]; + temp1[25] = -temp0[25] + temp0[6]; + temp1[26] = -temp0[26] + temp0[5]; + temp1[27] = -temp0[27] + temp0[4]; + temp1[28] = -temp0[28] + temp0[3]; + temp1[29] = -temp0[29] + temp0[2]; + temp1[30] = -temp0[30] + temp0[1]; + temp1[31] = -temp0[31] + temp0[0]; + temp1[32] = temp0[32]; + temp1[33] = temp0[33]; + temp1[34] = temp0[34]; + temp1[35] = temp0[35]; + temp1[36] = temp0[36]; + temp1[37] = temp0[37]; + temp1[38] = temp0[38]; + temp1[39] = temp0[39]; + temp1[40] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[40], cospi[32], temp0[55], cosBit); + temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[41], cospi[32], temp0[54], cosBit); + temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[42], cospi[32], temp0[53], cosBit); + temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[43], cospi[32], temp0[52], cosBit); + temp1[44] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[44], cospi[32], temp0[51], cosBit); + temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[45], cospi[32], temp0[50], cosBit); + temp1[46] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[46], cospi[32], temp0[49], cosBit); + temp1[47] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[47], cospi[32], temp0[48], cosBit); + temp1[48] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[48], cospi[32], temp0[47], cosBit); + temp1[49] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[49], cospi[32], temp0[46], cosBit); + temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[50], cospi[32], temp0[45], cosBit); + temp1[51] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[51], cospi[32], temp0[44], cosBit); + temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[52], cospi[32], temp0[43], cosBit); + temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[53], cospi[32], temp0[42], cosBit); + temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[54], cospi[32], temp0[41], cosBit); + temp1[55] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[55], cospi[32], temp0[40], cosBit); + temp1[56] = temp0[56]; + temp1[57] = temp0[57]; + temp1[58] = temp0[58]; + temp1[59] = temp0[59]; + temp1[60] = temp0[60]; + temp1[61] = temp0[61]; + temp1[62] = temp0[62]; + temp1[63] = temp0[63]; + + // stage 3 + temp0[0] = temp1[0] + temp1[15]; + temp0[1] = temp1[1] + temp1[14]; + temp0[2] = temp1[2] + temp1[13]; + temp0[3] = temp1[3] + temp1[12]; + temp0[4] = temp1[4] + temp1[11]; + temp0[5] = temp1[5] + temp1[10]; + temp0[6] = temp1[6] + temp1[9]; + temp0[7] = temp1[7] + temp1[8]; + temp0[8] = -temp1[8] + temp1[7]; + temp0[9] = -temp1[9] + temp1[6]; + temp0[10] = -temp1[10] + temp1[5]; + temp0[11] = -temp1[11] + temp1[4]; + temp0[12] = -temp1[12] + temp1[3]; + temp0[13] = -temp1[13] + temp1[2]; + temp0[14] = -temp1[14] + temp1[1]; + temp0[15] = -temp1[15] + temp1[0]; + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = temp1[18]; + temp0[19] = temp1[19]; + temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[20], cospi[32], temp1[27], cosBit); + temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[21], cospi[32], temp1[26], cosBit); + temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[22], cospi[32], temp1[25], cosBit); + temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[23], cospi[32], temp1[24], cosBit); + temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[24], cospi[32], temp1[23], cosBit); + temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[25], cospi[32], temp1[22], cosBit); + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[26], cospi[32], temp1[21], cosBit); + temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[27], cospi[32], temp1[20], cosBit); + temp0[28] = temp1[28]; + temp0[29] = temp1[29]; + temp0[30] = temp1[30]; + temp0[31] = temp1[31]; + temp0[32] = temp1[32] + temp1[47]; + temp0[33] = temp1[33] + temp1[46]; + temp0[34] = temp1[34] + temp1[45]; + temp0[35] = temp1[35] + temp1[44]; + temp0[36] = temp1[36] + temp1[43]; + temp0[37] = temp1[37] + temp1[42]; + temp0[38] = temp1[38] + temp1[41]; + temp0[39] = temp1[39] + temp1[40]; + temp0[40] = -temp1[40] + temp1[39]; + temp0[41] = -temp1[41] + temp1[38]; + temp0[42] = -temp1[42] + temp1[37]; + temp0[43] = -temp1[43] + temp1[36]; + temp0[44] = -temp1[44] + temp1[35]; + temp0[45] = -temp1[45] + temp1[34]; + temp0[46] = -temp1[46] + temp1[33]; + temp0[47] = -temp1[47] + temp1[32]; + temp0[48] = -temp1[48] + temp1[63]; + temp0[49] = -temp1[49] + temp1[62]; + temp0[50] = -temp1[50] + temp1[61]; + temp0[51] = -temp1[51] + temp1[60]; + temp0[52] = -temp1[52] + temp1[59]; + temp0[53] = -temp1[53] + temp1[58]; + temp0[54] = -temp1[54] + temp1[57]; + temp0[55] = -temp1[55] + temp1[56]; + temp0[56] = temp1[56] + temp1[55]; + temp0[57] = temp1[57] + temp1[54]; + temp0[58] = temp1[58] + temp1[53]; + temp0[59] = temp1[59] + temp1[52]; + temp0[60] = temp1[60] + temp1[51]; + temp0[61] = temp1[61] + temp1[50]; + temp0[62] = temp1[62] + temp1[49]; + temp0[63] = temp1[63] + temp1[48]; + + // stage 4 + temp1[0] = temp0[0] + temp0[7]; + temp1[1] = temp0[1] + temp0[6]; + temp1[2] = temp0[2] + temp0[5]; + temp1[3] = temp0[3] + temp0[4]; + temp1[4] = -temp0[4] + temp0[3]; + temp1[5] = -temp0[5] + temp0[2]; + temp1[6] = -temp0[6] + temp0[1]; + temp1[7] = -temp0[7] + temp0[0]; + temp1[8] = temp0[8]; + temp1[9] = temp0[9]; + temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[10], cospi[32], temp0[13], cosBit); + temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[11], cospi[32], temp0[12], cosBit); + temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[12], cospi[32], temp0[11], cosBit); + temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[13], cospi[32], temp0[10], cosBit); + temp1[14] = temp0[14]; + temp1[15] = temp0[15]; + temp1[16] = temp0[16] + temp0[23]; + temp1[17] = temp0[17] + temp0[22]; + temp1[18] = temp0[18] + temp0[21]; + temp1[19] = temp0[19] + temp0[20]; + temp1[20] = -temp0[20] + temp0[19]; + temp1[21] = -temp0[21] + temp0[18]; + temp1[22] = -temp0[22] + temp0[17]; + temp1[23] = -temp0[23] + temp0[16]; + temp1[24] = -temp0[24] + temp0[31]; + temp1[25] = -temp0[25] + temp0[30]; + temp1[26] = -temp0[26] + temp0[29]; + temp1[27] = -temp0[27] + temp0[28]; + temp1[28] = temp0[28] + temp0[27]; + temp1[29] = temp0[29] + temp0[26]; + temp1[30] = temp0[30] + temp0[25]; + temp1[31] = temp0[31] + temp0[24]; + temp1[32] = temp0[32]; + temp1[33] = temp0[33]; + temp1[34] = temp0[34]; + temp1[35] = temp0[35]; + temp1[36] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[36], cospi[48], temp0[59], cosBit); + temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[37], cospi[48], temp0[58], cosBit); + temp1[38] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[38], cospi[48], temp0[57], cosBit); + temp1[39] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[39], cospi[48], temp0[56], cosBit); + temp1[40] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[40], -cospi[16], temp0[55], cosBit); + temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[41], -cospi[16], temp0[54], cosBit); + temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[42], -cospi[16], temp0[53], cosBit); + temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[43], -cospi[16], temp0[52], cosBit); + temp1[44] = temp0[44]; + temp1[45] = temp0[45]; + temp1[46] = temp0[46]; + temp1[47] = temp0[47]; + temp1[48] = temp0[48]; + temp1[49] = temp0[49]; + temp1[50] = temp0[50]; + temp1[51] = temp0[51]; + temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[52], -cospi[16], temp0[43], cosBit); + temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[53], -cospi[16], temp0[42], cosBit); + temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[54], -cospi[16], temp0[41], cosBit); + temp1[55] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[55], -cospi[16], temp0[40], cosBit); + temp1[56] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[56], cospi[48], temp0[39], cosBit); + temp1[57] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[57], cospi[48], temp0[38], cosBit); + temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[58], cospi[48], temp0[37], cosBit); + temp1[59] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[59], cospi[48], temp0[36], cosBit); + temp1[60] = temp0[60]; + temp1[61] = temp0[61]; + temp1[62] = temp0[62]; + temp1[63] = temp0[63]; + + // stage 5 + temp0[0] = temp1[0] + temp1[3]; + temp0[1] = temp1[1] + temp1[2]; + temp0[2] = -temp1[2] + temp1[1]; + temp0[3] = -temp1[3] + temp1[0]; + temp0[4] = temp1[4]; + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[5], cospi[32], temp1[6], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[5], cosBit); + temp0[7] = temp1[7]; + temp0[8] = temp1[8] + temp1[11]; + temp0[9] = temp1[9] + temp1[10]; + temp0[10] = -temp1[10] + temp1[9]; + temp0[11] = -temp1[11] + temp1[8]; + temp0[12] = -temp1[12] + temp1[15]; + temp0[13] = -temp1[13] + temp1[14]; + temp0[14] = temp1[14] + temp1[13]; + temp0[15] = temp1[15] + temp1[12]; + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[18], cospi[48], temp1[29], cosBit); + temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[19], cospi[48], temp1[28], cosBit); + temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[20], -cospi[16], temp1[27], cosBit); + temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[21], -cospi[16], temp1[26], cosBit); + temp0[22] = temp1[22]; + temp0[23] = temp1[23]; + temp0[24] = temp1[24]; + temp0[25] = temp1[25]; + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[26], -cospi[16], temp1[21], cosBit); + temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[27], -cospi[16], temp1[20], cosBit); + temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[28], cospi[48], temp1[19], cosBit); + temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[29], cospi[48], temp1[18], cosBit); + temp0[30] = temp1[30]; + temp0[31] = temp1[31]; + temp0[32] = temp1[32] + temp1[39]; + temp0[33] = temp1[33] + temp1[38]; + temp0[34] = temp1[34] + temp1[37]; + temp0[35] = temp1[35] + temp1[36]; + temp0[36] = -temp1[36] + temp1[35]; + temp0[37] = -temp1[37] + temp1[34]; + temp0[38] = -temp1[38] + temp1[33]; + temp0[39] = -temp1[39] + temp1[32]; + temp0[40] = -temp1[40] + temp1[47]; + temp0[41] = -temp1[41] + temp1[46]; + temp0[42] = -temp1[42] + temp1[45]; + temp0[43] = -temp1[43] + temp1[44]; + temp0[44] = temp1[44] + temp1[43]; + temp0[45] = temp1[45] + temp1[42]; + temp0[46] = temp1[46] + temp1[41]; + temp0[47] = temp1[47] + temp1[40]; + temp0[48] = temp1[48] + temp1[55]; + temp0[49] = temp1[49] + temp1[54]; + temp0[50] = temp1[50] + temp1[53]; + temp0[51] = temp1[51] + temp1[52]; + temp0[52] = -temp1[52] + temp1[51]; + temp0[53] = -temp1[53] + temp1[50]; + temp0[54] = -temp1[54] + temp1[49]; + temp0[55] = -temp1[55] + temp1[48]; + temp0[56] = -temp1[56] + temp1[63]; + temp0[57] = -temp1[57] + temp1[62]; + temp0[58] = -temp1[58] + temp1[61]; + temp0[59] = -temp1[59] + temp1[60]; + temp0[60] = temp1[60] + temp1[59]; + temp0[61] = temp1[61] + temp1[58]; + temp0[62] = temp1[62] + temp1[57]; + temp0[63] = temp1[63] + temp1[56]; + + // stage 6 + temp1[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[0], cospi[32], temp0[1], cosBit); + temp1[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[1], cospi[32], temp0[0], cosBit); + temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[2], cospi[16], temp0[3], cosBit); + temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[3], -cospi[16], temp0[2], cosBit); + temp1[4] = temp0[4] + temp0[5]; + temp1[5] = -temp0[5] + temp0[4]; + temp1[6] = -temp0[6] + temp0[7]; + temp1[7] = temp0[7] + temp0[6]; + temp1[8] = temp0[8]; + temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[9], cospi[48], temp0[14], cosBit); + temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[10], -cospi[16], temp0[13], cosBit); + temp1[11] = temp0[11]; + temp1[12] = temp0[12]; + temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[13], -cospi[16], temp0[10], cosBit); + temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[14], cospi[48], temp0[9], cosBit); + temp1[15] = temp0[15]; + temp1[16] = temp0[16] + temp0[19]; + temp1[17] = temp0[17] + temp0[18]; + temp1[18] = -temp0[18] + temp0[17]; + temp1[19] = -temp0[19] + temp0[16]; + temp1[20] = -temp0[20] + temp0[23]; + temp1[21] = -temp0[21] + temp0[22]; + temp1[22] = temp0[22] + temp0[21]; + temp1[23] = temp0[23] + temp0[20]; + temp1[24] = temp0[24] + temp0[27]; + temp1[25] = temp0[25] + temp0[26]; + temp1[26] = -temp0[26] + temp0[25]; + temp1[27] = -temp0[27] + temp0[24]; + temp1[28] = -temp0[28] + temp0[31]; + temp1[29] = -temp0[29] + temp0[30]; + temp1[30] = temp0[30] + temp0[29]; + temp1[31] = temp0[31] + temp0[28]; + temp1[32] = temp0[32]; + temp1[33] = temp0[33]; + temp1[34] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp0[34], cospi[56], temp0[61], cosBit); + temp1[35] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp0[35], cospi[56], temp0[60], cosBit); + temp1[36] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp0[36], -cospi[8], temp0[59], cosBit); + temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp0[37], -cospi[8], temp0[58], cosBit); + temp1[38] = temp0[38]; + temp1[39] = temp0[39]; + temp1[40] = temp0[40]; + temp1[41] = temp0[41]; + temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp0[42], cospi[24], temp0[53], cosBit); + temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp0[43], cospi[24], temp0[52], cosBit); + temp1[44] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp0[44], -cospi[40], temp0[51], cosBit); + temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp0[45], -cospi[40], temp0[50], cosBit); + temp1[46] = temp0[46]; + temp1[47] = temp0[47]; + temp1[48] = temp0[48]; + temp1[49] = temp0[49]; + temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[50], -cospi[40], temp0[45], cosBit); + temp1[51] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[51], -cospi[40], temp0[44], cosBit); + temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp0[52], cospi[24], temp0[43], cosBit); + temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp0[53], cospi[24], temp0[42], cosBit); + temp1[54] = temp0[54]; + temp1[55] = temp0[55]; + temp1[56] = temp0[56]; + temp1[57] = temp0[57]; + temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[58], -cospi[8], temp0[37], cosBit); + temp1[59] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[59], -cospi[8], temp0[36], cosBit); + temp1[60] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp0[60], cospi[56], temp0[35], cosBit); + temp1[61] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp0[61], cospi[56], temp0[34], cosBit); + temp1[62] = temp0[62]; + temp1[63] = temp0[63]; + + // stage 7 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[4], cospi[8], temp1[7], cosBit); + temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[5], cospi[40], temp1[6], cosBit); + temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[6], -cospi[40], temp1[5], cosBit); + temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[7], -cospi[8], temp1[4], cosBit); + temp0[8] = temp1[8] + temp1[9]; + temp0[9] = -temp1[9] + temp1[8]; + temp0[10] = -temp1[10] + temp1[11]; + temp0[11] = temp1[11] + temp1[10]; + temp0[12] = temp1[12] + temp1[13]; + temp0[13] = -temp1[13] + temp1[12]; + temp0[14] = -temp1[14] + temp1[15]; + temp0[15] = temp1[15] + temp1[14]; + temp0[16] = temp1[16]; + temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp1[17], cospi[56], temp1[30], cosBit); + temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[18], -cospi[8], temp1[29], cosBit); + temp0[19] = temp1[19]; + temp0[20] = temp1[20]; + temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp1[21], cospi[24], temp1[26], cosBit); + temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[22], -cospi[40], temp1[25], cosBit); + temp0[23] = temp1[23]; + temp0[24] = temp1[24]; + temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[25], -cospi[40], temp1[22], cosBit); + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[26], cospi[24], temp1[21], cosBit); + temp0[27] = temp1[27]; + temp0[28] = temp1[28]; + temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[29], -cospi[8], temp1[18], cosBit); + temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[30], cospi[56], temp1[17], cosBit); + temp0[31] = temp1[31]; + temp0[32] = temp1[32] + temp1[35]; + temp0[33] = temp1[33] + temp1[34]; + temp0[34] = -temp1[34] + temp1[33]; + temp0[35] = -temp1[35] + temp1[32]; + temp0[36] = -temp1[36] + temp1[39]; + temp0[37] = -temp1[37] + temp1[38]; + temp0[38] = temp1[38] + temp1[37]; + temp0[39] = temp1[39] + temp1[36]; + temp0[40] = temp1[40] + temp1[43]; + temp0[41] = temp1[41] + temp1[42]; + temp0[42] = -temp1[42] + temp1[41]; + temp0[43] = -temp1[43] + temp1[40]; + temp0[44] = -temp1[44] + temp1[47]; + temp0[45] = -temp1[45] + temp1[46]; + temp0[46] = temp1[46] + temp1[45]; + temp0[47] = temp1[47] + temp1[44]; + temp0[48] = temp1[48] + temp1[51]; + temp0[49] = temp1[49] + temp1[50]; + temp0[50] = -temp1[50] + temp1[49]; + temp0[51] = -temp1[51] + temp1[48]; + temp0[52] = -temp1[52] + temp1[55]; + temp0[53] = -temp1[53] + temp1[54]; + temp0[54] = temp1[54] + temp1[53]; + temp0[55] = temp1[55] + temp1[52]; + temp0[56] = temp1[56] + temp1[59]; + temp0[57] = temp1[57] + temp1[58]; + temp0[58] = -temp1[58] + temp1[57]; + temp0[59] = -temp1[59] + temp1[56]; + temp0[60] = -temp1[60] + temp1[63]; + temp0[61] = -temp1[61] + temp1[62]; + temp0[62] = temp1[62] + temp1[61]; + temp0[63] = temp1[63] + temp1[60]; + + // stage 8 + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = temp0[6]; + temp1[7] = temp0[7]; + temp1[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[8], cospi[4], temp0[15], cosBit); + temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[9], cospi[36], temp0[14], cosBit); + temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[10], cospi[20], temp0[13], cosBit); + temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[11], cospi[52], temp0[12], cosBit); + temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[12], -cospi[52], temp0[11], cosBit); + temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[13], -cospi[20], temp0[10], cosBit); + temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[14], -cospi[36], temp0[9], cosBit); + temp1[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[15], -cospi[4], temp0[8], cosBit); + temp1[16] = temp0[16] + temp0[17]; + temp1[17] = -temp0[17] + temp0[16]; + temp1[18] = -temp0[18] + temp0[19]; + temp1[19] = temp0[19] + temp0[18]; + temp1[20] = temp0[20] + temp0[21]; + temp1[21] = -temp0[21] + temp0[20]; + temp1[22] = -temp0[22] + temp0[23]; + temp1[23] = temp0[23] + temp0[22]; + temp1[24] = temp0[24] + temp0[25]; + temp1[25] = -temp0[25] + temp0[24]; + temp1[26] = -temp0[26] + temp0[27]; + temp1[27] = temp0[27] + temp0[26]; + temp1[28] = temp0[28] + temp0[29]; + temp1[29] = -temp0[29] + temp0[28]; + temp1[30] = -temp0[30] + temp0[31]; + temp1[31] = temp0[31] + temp0[30]; + temp1[32] = temp0[32]; + temp1[33] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[4], temp0[33], cospi[60], temp0[62], cosBit); + temp1[34] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[60], temp0[34], -cospi[4], temp0[61], cosBit); + temp1[35] = temp0[35]; + temp1[36] = temp0[36]; + temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[36], temp0[37], cospi[28], temp0[58], cosBit); + temp1[38] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[28], temp0[38], -cospi[36], temp0[57], cosBit); + temp1[39] = temp0[39]; + temp1[40] = temp0[40]; + temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[20], temp0[41], cospi[44], temp0[54], cosBit); + temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[44], temp0[42], -cospi[20], temp0[53], cosBit); + temp1[43] = temp0[43]; + temp1[44] = temp0[44]; + temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[52], temp0[45], cospi[12], temp0[50], cosBit); + temp1[46] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[12], temp0[46], -cospi[52], temp0[49], cosBit); + temp1[47] = temp0[47]; + temp1[48] = temp0[48]; + temp1[49] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[49], -cospi[52], temp0[46], cosBit); + temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[52], temp0[50], cospi[12], temp0[45], cosBit); + temp1[51] = temp0[51]; + temp1[52] = temp0[52]; + temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[53], -cospi[20], temp0[42], cosBit); + temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[20], temp0[54], cospi[44], temp0[41], cosBit); + temp1[55] = temp0[55]; + temp1[56] = temp0[56]; + temp1[57] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[57], -cospi[36], temp0[38], cosBit); + temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[36], temp0[58], cospi[28], temp0[37], cosBit); + temp1[59] = temp0[59]; + temp1[60] = temp0[60]; + temp1[61] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[61], -cospi[4], temp0[34], cosBit); + temp1[62] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[4], temp0[62], cospi[60], temp0[33], cosBit); + temp1[63] = temp0[63]; + + // stage 9 + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = temp1[10]; + temp0[11] = temp1[11]; + temp0[12] = temp1[12]; + temp0[13] = temp1[13]; + temp0[14] = temp1[14]; + temp0[15] = temp1[15]; + temp0[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp1[16], cospi[2], temp1[31], cosBit); + temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp1[17], cospi[34], temp1[30], cosBit); + temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp1[18], cospi[18], temp1[29], cosBit); + temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp1[19], cospi[50], temp1[28], cosBit); + temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp1[20], cospi[10], temp1[27], cosBit); + temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp1[21], cospi[42], temp1[26], cosBit); + temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp1[22], cospi[26], temp1[25], cosBit); + temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp1[23], cospi[58], temp1[24], cosBit); + temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp1[24], -cospi[58], temp1[23], cosBit); + temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp1[25], -cospi[26], temp1[22], cosBit); + temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp1[26], -cospi[42], temp1[21], cosBit); + temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp1[27], -cospi[10], temp1[20], cosBit); + temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp1[28], -cospi[50], temp1[19], cosBit); + temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp1[29], -cospi[18], temp1[18], cosBit); + temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp1[30], -cospi[34], temp1[17], cosBit); + temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp1[31], -cospi[2], temp1[16], cosBit); + temp0[32] = temp1[32] + temp1[33]; + temp0[33] = -temp1[33] + temp1[32]; + temp0[34] = -temp1[34] + temp1[35]; + temp0[35] = temp1[35] + temp1[34]; + temp0[36] = temp1[36] + temp1[37]; + temp0[37] = -temp1[37] + temp1[36]; + temp0[38] = -temp1[38] + temp1[39]; + temp0[39] = temp1[39] + temp1[38]; + temp0[40] = temp1[40] + temp1[41]; + temp0[41] = -temp1[41] + temp1[40]; + temp0[42] = -temp1[42] + temp1[43]; + temp0[43] = temp1[43] + temp1[42]; + temp0[44] = temp1[44] + temp1[45]; + temp0[45] = -temp1[45] + temp1[44]; + temp0[46] = -temp1[46] + temp1[47]; + temp0[47] = temp1[47] + temp1[46]; + temp0[48] = temp1[48] + temp1[49]; + temp0[49] = -temp1[49] + temp1[48]; + temp0[50] = -temp1[50] + temp1[51]; + temp0[51] = temp1[51] + temp1[50]; + temp0[52] = temp1[52] + temp1[53]; + temp0[53] = -temp1[53] + temp1[52]; + temp0[54] = -temp1[54] + temp1[55]; + temp0[55] = temp1[55] + temp1[54]; + temp0[56] = temp1[56] + temp1[57]; + temp0[57] = -temp1[57] + temp1[56]; + temp0[58] = -temp1[58] + temp1[59]; + temp0[59] = temp1[59] + temp1[58]; + temp0[60] = temp1[60] + temp1[61]; + temp0[61] = -temp1[61] + temp1[60]; + temp0[62] = -temp1[62] + temp1[63]; + temp0[63] = temp1[63] + temp1[62]; + + // stage 10 + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = temp0[6]; + temp1[7] = temp0[7]; + temp1[8] = temp0[8]; + temp1[9] = temp0[9]; + temp1[10] = temp0[10]; + temp1[11] = temp0[11]; + temp1[12] = temp0[12]; + temp1[13] = temp0[13]; + temp1[14] = temp0[14]; + temp1[15] = temp0[15]; + temp1[16] = temp0[16]; + temp1[17] = temp0[17]; + temp1[18] = temp0[18]; + temp1[19] = temp0[19]; + temp1[20] = temp0[20]; + temp1[21] = temp0[21]; + temp1[22] = temp0[22]; + temp1[23] = temp0[23]; + temp1[24] = temp0[24]; + temp1[25] = temp0[25]; + temp1[26] = temp0[26]; + temp1[27] = temp0[27]; + temp1[28] = temp0[28]; + temp1[29] = temp0[29]; + temp1[30] = temp0[30]; + temp1[31] = temp0[31]; + temp1[32] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[63], temp0[32], cospi[1], temp0[63], cosBit); + temp1[33] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[31], temp0[33], cospi[33], temp0[62], cosBit); + temp1[34] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[47], temp0[34], cospi[17], temp0[61], cosBit); + temp1[35] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[15], temp0[35], cospi[49], temp0[60], cosBit); + temp1[36] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[55], temp0[36], cospi[9], temp0[59], cosBit); + temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[23], temp0[37], cospi[41], temp0[58], cosBit); + temp1[38] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[39], temp0[38], cospi[25], temp0[57], cosBit); + temp1[39] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[7], temp0[39], cospi[57], temp0[56], cosBit); + temp1[40] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[59], temp0[40], cospi[5], temp0[55], cosBit); + temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[27], temp0[41], cospi[37], temp0[54], cosBit); + temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[43], temp0[42], cospi[21], temp0[53], cosBit); + temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[11], temp0[43], cospi[53], temp0[52], cosBit); + temp1[44] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[51], temp0[44], cospi[13], temp0[51], cosBit); + temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[19], temp0[45], cospi[45], temp0[50], cosBit); + temp1[46] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[35], temp0[46], cospi[29], temp0[49], cosBit); + temp1[47] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[3], temp0[47], cospi[61], temp0[48], cosBit); + temp1[48] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[3], temp0[48], -cospi[61], temp0[47], cosBit); + temp1[49] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[35], temp0[49], -cospi[29], temp0[46], cosBit); + temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[19], temp0[50], -cospi[45], temp0[45], cosBit); + temp1[51] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[51], temp0[51], -cospi[13], temp0[44], cosBit); + temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[11], temp0[52], -cospi[53], temp0[43], cosBit); + temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[43], temp0[53], -cospi[21], temp0[42], cosBit); + temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[27], temp0[54], -cospi[37], temp0[41], cosBit); + temp1[55] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[59], temp0[55], -cospi[5], temp0[40], cosBit); + temp1[56] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[7], temp0[56], -cospi[57], temp0[39], cosBit); + temp1[57] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[39], temp0[57], -cospi[25], temp0[38], cosBit); + temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[23], temp0[58], -cospi[41], temp0[37], cosBit); + temp1[59] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[55], temp0[59], -cospi[9], temp0[36], cosBit); + temp1[60] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[15], temp0[60], -cospi[49], temp0[35], cosBit); + temp1[61] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[47], temp0[61], -cospi[17], temp0[34], cosBit); + temp1[62] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[31], temp0[62], -cospi[33], temp0[33], cosBit); + temp1[63] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[63], temp0[63], -cospi[1], temp0[32], cosBit); + + // stage 11 + Unsafe.Add(ref output, 0) = temp1[0]; + Unsafe.Add(ref output, 1) = temp1[32]; + Unsafe.Add(ref output, 2) = temp1[16]; + Unsafe.Add(ref output, 3) = temp1[48]; + Unsafe.Add(ref output, 4) = temp1[8]; + Unsafe.Add(ref output, 5) = temp1[40]; + Unsafe.Add(ref output, 6) = temp1[24]; + Unsafe.Add(ref output, 7) = temp1[56]; + Unsafe.Add(ref output, 8) = temp1[4]; + Unsafe.Add(ref output, 9) = temp1[36]; + Unsafe.Add(ref output, 10) = temp1[20]; + Unsafe.Add(ref output, 11) = temp1[52]; + Unsafe.Add(ref output, 12) = temp1[12]; + Unsafe.Add(ref output, 13) = temp1[44]; + Unsafe.Add(ref output, 14) = temp1[28]; + Unsafe.Add(ref output, 15) = temp1[60]; + Unsafe.Add(ref output, 16) = temp1[2]; + Unsafe.Add(ref output, 17) = temp1[34]; + Unsafe.Add(ref output, 18) = temp1[18]; + Unsafe.Add(ref output, 19) = temp1[50]; + Unsafe.Add(ref output, 20) = temp1[10]; + Unsafe.Add(ref output, 21) = temp1[42]; + Unsafe.Add(ref output, 22) = temp1[26]; + Unsafe.Add(ref output, 23) = temp1[58]; + Unsafe.Add(ref output, 24) = temp1[6]; + Unsafe.Add(ref output, 25) = temp1[38]; + Unsafe.Add(ref output, 26) = temp1[22]; + Unsafe.Add(ref output, 27) = temp1[54]; + Unsafe.Add(ref output, 28) = temp1[14]; + Unsafe.Add(ref output, 29) = temp1[46]; + Unsafe.Add(ref output, 30) = temp1[30]; + Unsafe.Add(ref output, 31) = temp1[62]; + Unsafe.Add(ref output, 32) = temp1[1]; + Unsafe.Add(ref output, 33) = temp1[33]; + Unsafe.Add(ref output, 34) = temp1[17]; + Unsafe.Add(ref output, 35) = temp1[49]; + Unsafe.Add(ref output, 36) = temp1[9]; + Unsafe.Add(ref output, 37) = temp1[41]; + Unsafe.Add(ref output, 38) = temp1[25]; + Unsafe.Add(ref output, 39) = temp1[57]; + Unsafe.Add(ref output, 40) = temp1[5]; + Unsafe.Add(ref output, 41) = temp1[37]; + Unsafe.Add(ref output, 42) = temp1[21]; + Unsafe.Add(ref output, 43) = temp1[53]; + Unsafe.Add(ref output, 44) = temp1[13]; + Unsafe.Add(ref output, 45) = temp1[45]; + Unsafe.Add(ref output, 46) = temp1[29]; + Unsafe.Add(ref output, 47) = temp1[61]; + Unsafe.Add(ref output, 48) = temp1[3]; + Unsafe.Add(ref output, 49) = temp1[35]; + Unsafe.Add(ref output, 50) = temp1[19]; + Unsafe.Add(ref output, 51) = temp1[51]; + Unsafe.Add(ref output, 52) = temp1[11]; + Unsafe.Add(ref output, 53) = temp1[43]; + Unsafe.Add(ref output, 54) = temp1[27]; + Unsafe.Add(ref output, 55) = temp1[59]; + Unsafe.Add(ref output, 56) = temp1[7]; + Unsafe.Add(ref output, 57) = temp1[39]; + Unsafe.Add(ref output, 58) = temp1[23]; + Unsafe.Add(ref output, 59) = temp1[55]; + Unsafe.Add(ref output, 60) = temp1[15]; + Unsafe.Add(ref output, 61) = temp1[47]; + Unsafe.Add(ref output, 62) = temp1[31]; + Unsafe.Add(ref output, 63) = temp1[63]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs index 1a6d864632..e8e449ac45 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs @@ -1,10 +1,71 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Dct8Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output, cosBit); + + private static void TransformScalar(ref int input, ref int output, int cosBit) + { + Span temp0 = stackalloc int[8]; + Span temp1 = stackalloc int[8]; + + // stage 0; + + // stage 1; + temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 7); + temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 6); + temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 5); + temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 4); + temp0[4] = -Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 3); + temp0[5] = -Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 2); + temp0[6] = -Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 1); + temp0[7] = -Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 0); + + // stage 2 + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + temp1[0] = temp0[0] + temp0[3]; + temp1[1] = temp0[1] + temp0[2]; + temp1[2] = -temp0[2] + temp0[1]; + temp1[3] = -temp0[3] + temp0[0]; + temp1[4] = temp0[4]; + temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[5], cospi[32], temp0[6], cosBit); + temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], cospi[32], temp0[5], cosBit); + temp1[7] = temp0[7]; + + // stage 3 + temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[0], cospi[32], temp1[1], cosBit); + temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[1], cospi[32], temp1[0], cosBit); + temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[2], cospi[16], temp1[3], cosBit); + temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[3], -cospi[16], temp1[2], cosBit); + temp0[4] = temp1[4] + temp1[5]; + temp0[5] = -temp1[5] + temp1[4]; + temp0[6] = -temp1[6] + temp1[7]; + temp0[7] = temp1[7] + temp1[6]; + + // stage 4 + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[4], cospi[8], temp0[7], cosBit); + temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[5], cospi[40], temp0[6], cosBit); + temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[6], -cospi[40], temp0[5], cosBit); + temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[7], -cospi[8], temp0[4], cosBit); + + // stage 5 + Unsafe.Add(ref output, 0) = temp1[0]; + Unsafe.Add(ref output, 1) = temp1[4]; + Unsafe.Add(ref output, 2) = temp1[2]; + Unsafe.Add(ref output, 3) = temp1[6]; + Unsafe.Add(ref output, 4) = temp1[1]; + Unsafe.Add(ref output, 5) = temp1[5]; + Unsafe.Add(ref output, 6) = temp1[3]; + Unsafe.Add(ref output, 7) = temp1[7]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs index 1619301a3b..cb8e98fc70 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs @@ -7,8 +7,8 @@ namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal abstract class Av1Forward2dTransformerBase { - private const int NewSqrt = 5793; - private const int NewSqrtBitCount = 12; + internal const int NewSqrt2 = 5793; + internal const int NewSqrt2BitCount = 12; /// /// SVT: av1_tranform_two_d_core_c @@ -108,7 +108,7 @@ internal abstract class Av1Forward2dTransformerBase for (c = 0; c < transformColumnCount; ++c) { ref int current = ref Unsafe.Add(ref output, (r * transformColumnCount) + c); - current = Av1Math.RoundShift((long)current * NewSqrt, NewSqrtBitCount); + current = Av1Math.RoundShift((long)current * NewSqrt2, NewSqrt2BitCount); } } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs index d26f2f7865..3dc8be853f 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs @@ -1,10 +1,34 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Identity16Forward1dTransformer : IAv1Forward1dTransformer { + private const int TwiceNewSqrt2 = 2 * 5793; + public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output); + + private static void TransformScalar(ref int input, ref int output) + { + output = Av1Math.RoundShift((long)input * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 1) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 1) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 2) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 2) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 3) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 3) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 4) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 4) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 5) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 5) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 6) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 6) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 7) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 7) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 8) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 8) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 9) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 9) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 10) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 10) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 11) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 11) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 12) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 12) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 13) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 13) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 14) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 14) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 15) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 15) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs index e6232664f5..34985e9a9b 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs @@ -1,10 +1,29 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Identity32Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + { + TransformScalar(ref input, ref output); + TransformScalar(ref Unsafe.Add(ref input, 8), ref Unsafe.Add(ref output, 8)); + TransformScalar(ref Unsafe.Add(ref input, 16), ref Unsafe.Add(ref output, 16)); + TransformScalar(ref Unsafe.Add(ref input, 24), ref Unsafe.Add(ref output, 24)); + } + + private static void TransformScalar(ref int input, ref int output) + { + output = input << 2; + Unsafe.Add(ref output, 1) = Unsafe.Add(ref input, 1) << 2; + Unsafe.Add(ref output, 2) = Unsafe.Add(ref input, 2) << 2; + Unsafe.Add(ref output, 3) = Unsafe.Add(ref input, 3) << 2; + Unsafe.Add(ref output, 4) = Unsafe.Add(ref input, 4) << 2; + Unsafe.Add(ref output, 5) = Unsafe.Add(ref input, 5) << 2; + Unsafe.Add(ref output, 6) = Unsafe.Add(ref input, 6) << 2; + Unsafe.Add(ref output, 7) = Unsafe.Add(ref input, 7) << 2; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs index a478054832..baa622b0a8 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs @@ -1,10 +1,20 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Identity4Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output); + + private static void TransformScalar(ref int input, ref int output) + { + output = Av1Math.RoundShift((long)input * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 1) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 1) * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 2) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 2) * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 3) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 3) * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs index 4910896fc5..2c3a351813 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs @@ -1,10 +1,39 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Identity64Forward1dTransformer : IAv1Forward1dTransformer { + private const int QuadNewSqrt2 = 4 * 5793; + public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + { + TransformScalar(ref input, ref output); + TransformScalar(ref Unsafe.Add(ref input, 16), ref Unsafe.Add(ref output, 16)); + TransformScalar(ref Unsafe.Add(ref input, 32), ref Unsafe.Add(ref output, 32)); + TransformScalar(ref Unsafe.Add(ref input, 48), ref Unsafe.Add(ref output, 48)); + } + + private static void TransformScalar(ref int input, ref int output) + { + output = Av1Math.RoundShift((long)input * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 1) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 1) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 2) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 2) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 3) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 3) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 4) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 4) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 5) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 5) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 6) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 6) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 7) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 7) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 8) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 8) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 9) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 9) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 10) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 10) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 11) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 11) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 12) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 12) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 13) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 13) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 14) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 14) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + Unsafe.Add(ref output, 15) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 15) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount); + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs index 497663d032..88c3585c0d 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs @@ -1,10 +1,24 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; internal class Av1Identity8Forward1dTransformer : IAv1Forward1dTransformer { public void Transform(ref int input, ref int output, int cosBit, Span stageRange) - => throw new NotImplementedException(); + => TransformScalar(ref input, ref output); + + private static void TransformScalar(ref int input, ref int output) + { + output = input << 1; + Unsafe.Add(ref output, 1) = Unsafe.Add(ref input, 1) << 1; + Unsafe.Add(ref output, 2) = Unsafe.Add(ref input, 2) << 1; + Unsafe.Add(ref output, 3) = Unsafe.Add(ref input, 3) << 1; + Unsafe.Add(ref output, 4) = Unsafe.Add(ref input, 4) << 1; + Unsafe.Add(ref output, 5) = Unsafe.Add(ref input, 5) << 1; + Unsafe.Add(ref output, 6) = Unsafe.Add(ref input, 6) << 1; + Unsafe.Add(ref output, 7) = Unsafe.Add(ref input, 7) << 1; + } } diff --git a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs index 093a6b1555..c9674cd442 100644 --- a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs +++ b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs @@ -13,15 +13,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Heif.Av1; [Trait("Format", "Avif")] public class Av1ForwardTransformTests { - private static readonly double[] MaximumAllowedError = + private static readonly int[] MaximumAllowedError = [ 3, // 4x4 transform 5, // 8x8 transform 11, // 16x16 transform 70, // 32x32 transform 64, // 64x64 transform - 3.9, // 4x8 transform - 4.3, // 8x4 transform + 4, // 4x8 transform + 5, // 8x4 transform 12, // 8x16 transform 12, // 16x8 transform 32, // 16x32 transform @@ -36,45 +36,61 @@ public class Av1ForwardTransformTests 36, // 64x16 transform ]; - [Theory] - [MemberData(nameof(GetSizes))] - public void AccuracyDct1dTest(int txSize) - { - Random rnd = new(0); - const int testBlockCount = 1; // Originally set to: 1000 - Av1TransformSize transformSize = (Av1TransformSize)txSize; - Av1Transform2dFlipConfiguration config = new(Av1TransformType.DctDct, transformSize); - int width = config.TransformSize.GetWidth(); + [Fact] + public void AccuracyOfDct1dTransformSize4Test() + => AssertAccuracy1d(Av1TransformSize.Size4x4, Av1TransformType.DctDct, new Av1Dct4Forward1dTransformer()); - int[] inputOfTest = new int[width]; - double[] inputReference = new double[width]; - int[] outputOfTest = new int[width]; - double[] outputReference = new double[width]; - for (int ti = 0; ti < testBlockCount; ++ti) - { - // prepare random test data - for (int ni = 0; ni < width; ++ni) - { - inputOfTest[ni] = (short)rnd.Next((1 << 10) - 1); - inputReference[ni] = inputOfTest[ni]; - outputReference[ni] = 0; - outputOfTest[ni] = 255; - } + [Fact] + public void AccuracyOfDct1dTransformSize8Test() + => AssertAccuracy1d(Av1TransformSize.Size8x8, Av1TransformType.DctDct, new Av1Dct8Forward1dTransformer(), 2); - // calculate in forward transform functions - new Av1Dct4Forward1dTransformer().Transform( - ref inputOfTest[0], - ref outputOfTest[0], - config.CosBitColumn, - config.StageRangeColumn); + [Fact] + public void AccuracyOfDct1dTransformSize16Test() + => AssertAccuracy1d(Av1TransformSize.Size16x16, Av1TransformType.DctDct, new Av1Dct16Forward1dTransformer(), 3); - // calculate in reference forward transform functions - Av1ReferenceTransform.ReferenceDct1d(inputReference, outputReference, width); + [Fact] + public void AccuracyOfDct1dTransformSize32Test() + => AssertAccuracy1d(Av1TransformSize.Size32x32, Av1TransformType.DctDct, new Av1Dct32Forward1dTransformer(), 4); - // Assert - Assert.True(CompareWithError(outputReference, outputOfTest, 1)); - } - } + [Fact] + public void AccuracyOfDct1dTransformSize64Test() + => AssertAccuracy1d(Av1TransformSize.Size64x64, Av1TransformType.DctDct, new Av1Dct64Forward1dTransformer(), 5); + + [Fact] + public void AccuracyOfAdst1dTransformSize4Test() + => AssertAccuracy1d(Av1TransformSize.Size4x4, Av1TransformType.AdstAdst, new Av1Adst4Forward1dTransformer()); + + [Fact] + public void AccuracyOfAdst1dTransformSize8Test() + => AssertAccuracy1d(Av1TransformSize.Size8x8, Av1TransformType.AdstAdst, new Av1Adst8Forward1dTransformer(), 2); + + [Fact] + public void AccuracyOfAdst1dTransformSize16Test() + => AssertAccuracy1d(Av1TransformSize.Size16x16, Av1TransformType.AdstAdst, new Av1Adst16Forward1dTransformer(), 3); + + [Fact] + public void AccuracyOfAdst1dTransformSize32Test() + => AssertAccuracy1d(Av1TransformSize.Size32x32, Av1TransformType.AdstAdst, new Av1Adst32Forward1dTransformer(), 4); + + [Fact] + public void AccuracyOfIdentity1dTransformSize4Test() + => AssertAccuracy1d(Av1TransformSize.Size4x4, Av1TransformType.Identity, new Av1Identity4Forward1dTransformer()); + + [Fact] + public void AccuracyOfIdentity1dTransformSize8Test() + => AssertAccuracy1d(Av1TransformSize.Size8x8, Av1TransformType.Identity, new Av1Identity8Forward1dTransformer()); + + [Fact] + public void AccuracyOfIdentity1dTransformSize16Test() + => AssertAccuracy1d(Av1TransformSize.Size16x16, Av1TransformType.Identity, new Av1Identity16Forward1dTransformer()); + + [Fact] + public void AccuracyOfIdentity1dTransformSize32Test() + => AssertAccuracy1d(Av1TransformSize.Size32x32, Av1TransformType.Identity, new Av1Identity32Forward1dTransformer()); + + [Fact] + public void AccuracyOfIdentity1dTransformSize64Test() + => AssertAccuracy1d(Av1TransformSize.Size64x64, Av1TransformType.Identity, new Av1Identity64Forward1dTransformer()); [Theory] [MemberData(nameof(GetCombinations))] @@ -212,10 +228,51 @@ public class Av1ForwardTransformTests } } + private static void AssertAccuracy1d( + Av1TransformSize transformSize, + Av1TransformType transformType, + IAv1Forward1dTransformer transformerUnderTest, + int allowedError = 1) + { + Random rnd = new(0); + const int testBlockCount = 1; // Originally set to: 1000 + Av1Transform2dFlipConfiguration config = new(transformType, transformSize); + int width = config.TransformSize.GetWidth(); + + int[] inputOfTest = new int[width]; + double[] inputReference = new double[width]; + int[] outputOfTest = new int[width]; + double[] outputReference = new double[width]; + for (int ti = 0; ti < testBlockCount; ++ti) + { + // prepare random test data + for (int ni = 0; ni < width; ++ni) + { + inputOfTest[ni] = (short)rnd.Next((1 << 10) - 1); + inputReference[ni] = inputOfTest[ni]; + outputReference[ni] = 0; + outputOfTest[ni] = 255; + } + + // calculate in forward transform functions + transformerUnderTest.Transform( + ref inputOfTest[0], + ref outputOfTest[0], + config.CosBitColumn, + config.StageRangeColumn); + + // calculate in reference forward transform functions + Av1ReferenceTransform.ReferenceTransform1d(config.TransformTypeColumn, inputReference, outputReference, width); + + // Assert + Assert.True(CompareWithError(outputReference, outputOfTest, allowedError)); + } + } + private static bool CompareWithError(Span expected, Span actual, double allowedError) { // compare for the result is witghin accuracy - double maximumErrorInTest = 0; + double maximumErrorInTest = 0d; for (int ni = 0; ni < expected.Length; ++ni) { maximumErrorInTest = Math.Max(maximumErrorInTest, Math.Abs(actual[ni] - Math.Round(expected[ni]))); @@ -224,21 +281,12 @@ public class Av1ForwardTransformTests return maximumErrorInTest <= allowedError; } - public static TheoryData GetSizes() - { - TheoryData sizes = []; - - // For now test only 4x4. - sizes.Add(0); - return sizes; - } - public static TheoryData GetCombinations() { TheoryData combinations = []; for (int s = 0; s < (int)Av1TransformSize.AllSizes; s++) { - double maxError = MaximumAllowedError[s]; + int maxError = MaximumAllowedError[s]; for (int t = 0; t < (int)Av1TransformType.AllTransformTypes; t++) { Av1TransformType transformType = (Av1TransformType)t; @@ -246,7 +294,7 @@ public class Av1ForwardTransformTests Av1Transform2dFlipConfiguration config = new(transformType, transformSize); if (config.IsAllowed()) { - combinations.Add(s, t, (int)maxError); + combinations.Add(s, t, maxError); } // For now only DCT. diff --git a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ReferenceTransform.cs b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ReferenceTransform.cs index 5cb91ca44d..a4d5c105b0 100644 --- a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ReferenceTransform.cs +++ b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ReferenceTransform.cs @@ -174,7 +174,7 @@ internal class Av1ReferenceTransform } } - internal static void ReferenceDct1d(Span input, Span output, int size) + private static void ReferenceDct1d(Span input, Span output, int size) { const double kInvSqrt2 = 0.707106781186547524400844362104f; for (int k = 0; k < size; ++k) @@ -223,7 +223,7 @@ internal class Av1ReferenceTransform } } - private static void ReferenceTransform1d(Av1TransformType1d type, Span input, Span output, int size) + internal static void ReferenceTransform1d(Av1TransformType1d type, Span input, Span output, int size) { switch (type) {