Browse Source

Implement all 1 dimensional forward transforms

pull/2633/head
Ynse Hoornenborg 2 years ago
parent
commit
f1908c8620
  1. 62
      src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs
  2. 12
      src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs
  3. 176
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs
  4. 387
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs
  5. 64
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs
  6. 84
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs
  7. 139
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs
  8. 319
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs
  9. 10
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs
  10. 739
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs
  11. 63
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs
  12. 6
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs
  13. 26
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs
  14. 21
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs
  15. 12
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs
  16. 31
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs
  17. 16
      src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs
  18. 148
      tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs
  19. 4
      tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ReferenceTransform.cs

62
src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs

@ -31,7 +31,7 @@ internal class Av1ForwardTransformer
null
];
private static readonly int[] TemporaryCoefficientsBuffer = new int[64 * 64];
private static readonly int[] TemporaryCoefficientsBuffer = new int[Av1Constants.MaxTransformSize * Av1Constants.MaxTransformSize];
internal static void Transform2d(Span<short> input, Span<int> coefficients, uint stride, Av1TransformType transformType, Av1TransformSize transformSize, int bitDepth)
{
@ -49,54 +49,6 @@ internal class Av1ForwardTransformer
}
}
internal static void Transform2dAvx2(Span<short> input, Span<int> coefficients, uint stride, Av1TransformType transformType, Av1TransformSize transformSize, int bitDepth)
{
switch (transformSize)
{
case Av1TransformSize.Size4x4:
// Too small for intrinsics, use the scalar codepath instead.
Transform2d(input, coefficients, stride, transformType, transformSize, bitDepth);
break;
case Av1TransformSize.Size8x8:
Transform8x8Avx2(input, coefficients, stride, transformType, bitDepth);
break;
default:
Transform2d(input, coefficients, stride, transformType, transformSize, bitDepth);
break;
}
}
/// <summary>
/// SVT: svt_av1_fwd_txfm2d_8x8_avx2
/// </summary>
private static void Transform8x8Avx2(Span<short> input, Span<int> coefficients, uint stride, Av1TransformType transformType, int bitDepth)
{
Av1Transform2dFlipConfiguration config = new(transformType, Av1TransformSize.Size8x8);
Span<int> shift = config.Shift;
Span<Vector256<int>> inVector = stackalloc Vector256<int>[8];
Span<Vector256<int>> outVector = stackalloc Vector256<int>[8];
ref Vector256<int> inRef = ref inVector[0];
ref Vector256<int> outRef = ref outVector[0];
switch (transformType)
{
case Av1TransformType.DctDct:
/* Pseudo code
Av1Dct8ForwardTransformer dct8 = new();
LoadBuffer8x8(ref input[0], ref inRef, stride, 0, 0, shift[0]);
dct8.TransformAvx2(ref inRef, ref outRef, config.CosBitColumn, 1);
Column8x8Rounding(ref outRef, -shift[1]);
Transpose8x8Avx2(ref outRef, ref inRef);
dct8.TransformAvx2(ref inRef, ref outRef, config.CosBitRow, 1);
Transpose8x8Avx2(ref outRef, ref inRef);
WriteBuffer8x8(ref inRef, ref coefficients[0]);
break;
*/
throw new NotImplementedException();
default:
throw new NotImplementedException();
}
}
private static IAv1Forward1dTransformer? GetTransformer(Av1TransformFunctionType transformerType)
=> Transformers[(int)transformerType];
@ -155,7 +107,7 @@ internal class Av1ForwardTransformer
uint t = (uint)(c + ((transformRowCount - 1) * (int)inputStride));
for (r = 0; r < transformRowCount; ++r)
{
// flip upside down
// Flip upside down
Unsafe.Add(ref tempIn, r) = Unsafe.Add(ref input, t);
t -= inputStride;
}
@ -188,17 +140,23 @@ internal class Av1ForwardTransformer
// Rows
for (r = 0; r < transformRowCount; ++r)
{
transformFunctionRow.Transform(ref Unsafe.Add(ref buf, r * transformColumnCount), ref Unsafe.Add(ref output, r * transformColumnCount), cosBitRow, stageRangeRow);
transformFunctionRow.Transform(
ref Unsafe.Add(ref buf, r * transformColumnCount),
ref Unsafe.Add(ref output, r * transformColumnCount),
cosBitRow,
stageRangeRow);
RoundShiftArray(ref Unsafe.Add(ref output, r * transformColumnCount), transformColumnCount, -shift[2]);
if (Math.Abs(rectangleType) == 1)
{
// Multiply everything by Sqrt2 if the transform is rectangular and the
// size difference is a factor of 2.
int t = r * transformColumnCount;
for (c = 0; c < transformColumnCount; ++c)
{
ref int current = ref Unsafe.Add(ref output, (r * transformColumnCount) + c);
ref int current = ref Unsafe.Add(ref output, t);
current = Av1Math.RoundShift((long)current * NewSqrt, NewSqrtBitCount);
t++;
}
}
}

12
src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs

@ -131,15 +131,15 @@ internal class Av1Transform2dFlipConfiguration
this.TransformSize = transformSize;
this.TransformType = transformType;
this.SetFlip(transformType);
Av1TransformType1d tx_type_1d_col = VerticalType[(int)transformType];
Av1TransformType1d tx_type_1d_row = HorizontalType[(int)transformType];
this.TransformTypeColumn = VerticalType[(int)transformType];
this.TransformTypeRow = HorizontalType[(int)transformType];
int txw_idx = transformSize.GetBlockWidthLog2() - SmallestTransformSizeLog2;
int txh_idx = transformSize.GetBlockHeightLog2() - SmallestTransformSizeLog2;
this.shift = ShiftMap[(int)transformSize];
this.CosBitColumn = CosBitColumnMap[txw_idx][txh_idx];
this.CosBitRow = CosBitRowMap[txw_idx][txh_idx];
this.TransformFunctionTypeColumn = TransformFunctionTypeMap[txh_idx][(int)tx_type_1d_col];
this.TransformFunctionTypeRow = TransformFunctionTypeMap[txw_idx][(int)tx_type_1d_row];
this.TransformFunctionTypeColumn = TransformFunctionTypeMap[txh_idx][(int)this.TransformTypeColumn];
this.TransformFunctionTypeRow = TransformFunctionTypeMap[txw_idx][(int)this.TransformTypeRow];
this.StageNumberColumn = StageNumberList[(int)this.TransformFunctionTypeColumn];
this.StageNumberRow = StageNumberList[(int)this.TransformFunctionTypeRow];
this.StageRangeColumn = new byte[12];
@ -151,6 +151,10 @@ internal class Av1Transform2dFlipConfiguration
public int CosBitRow { get; }
public Av1TransformType1d TransformTypeColumn { get; }
public Av1TransformType1d TransformTypeRow { get; }
public Av1TransformFunctionType TransformFunctionTypeColumn { get; }
public Av1TransformFunctionType TransformFunctionTypeRow { get; }

176
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs

@ -1,10 +1,184 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Adst16Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output, cosBit);
private static void TransformScalar(ref int input, ref int output, int cosBit)
{
Span<int> temp0 = stackalloc int[16];
Span<int> temp1 = stackalloc int[16];
// stage 0;
// stage 1;
Guard.IsFalse(output == input, nameof(output), "Cannot operate on same buffer for input and output.");
temp1[0] = input;
temp1[1] = -Unsafe.Add(ref input, 15);
temp1[2] = -Unsafe.Add(ref input, 7);
temp1[3] = Unsafe.Add(ref input, 8);
temp1[4] = -Unsafe.Add(ref input, 3);
temp1[5] = Unsafe.Add(ref input, 12);
temp1[6] = Unsafe.Add(ref input, 4);
temp1[7] = -Unsafe.Add(ref input, 11);
temp1[8] = -Unsafe.Add(ref input, 1);
temp1[9] = Unsafe.Add(ref input, 14);
temp1[10] = Unsafe.Add(ref input, 6);
temp1[11] = -Unsafe.Add(ref input, 9);
temp1[12] = Unsafe.Add(ref input, 2);
temp1[13] = -Unsafe.Add(ref input, 13);
temp1[14] = -Unsafe.Add(ref input, 5);
temp1[15] = Unsafe.Add(ref input, 10);
// stage 2
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit);
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[2], cospi[32], temp1[3], cosBit);
temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[2], -cospi[32], temp1[3], cosBit);
temp0[4] = temp1[4];
temp0[5] = temp1[5];
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[7], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], -cospi[32], temp1[7], cosBit);
temp0[8] = temp1[8];
temp0[9] = temp1[9];
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[10], cospi[32], temp1[11], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[10], -cospi[32], temp1[11], cosBit);
temp0[12] = temp1[12];
temp0[13] = temp1[13];
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[14], cospi[32], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[14], -cospi[32], temp1[15], cosBit);
// stage 3
temp1[0] = temp0[0] + temp0[2];
temp1[1] = temp0[1] + temp0[3];
temp1[2] = temp0[0] - temp0[2];
temp1[3] = temp0[1] - temp0[3];
temp1[4] = temp0[4] + temp0[6];
temp1[5] = temp0[5] + temp0[7];
temp1[6] = temp0[4] - temp0[6];
temp1[7] = temp0[5] - temp0[7];
temp1[8] = temp0[8] + temp0[10];
temp1[9] = temp0[9] + temp0[11];
temp1[10] = temp0[8] - temp0[10];
temp1[11] = temp0[9] - temp0[11];
temp1[12] = temp0[12] + temp0[14];
temp1[13] = temp0[13] + temp0[15];
temp1[14] = temp0[12] - temp0[14];
temp1[15] = temp0[13] - temp0[15];
// stage 4
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[4], cospi[48], temp1[5], cosBit);
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[4], -cospi[16], temp1[5], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[6], cospi[16], temp1[7], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[6], cospi[48], temp1[7], cosBit);
temp0[8] = temp1[8];
temp0[9] = temp1[9];
temp0[10] = temp1[10];
temp0[11] = temp1[11];
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[12], cospi[48], temp1[13], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[12], -cospi[16], temp1[13], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[14], cospi[16], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[14], cospi[48], temp1[15], cosBit);
// stage 5
temp1[0] = temp0[0] + temp0[4];
temp1[1] = temp0[1] + temp0[5];
temp1[2] = temp0[2] + temp0[6];
temp1[3] = temp0[3] + temp0[7];
temp1[4] = temp0[0] - temp0[4];
temp1[5] = temp0[1] - temp0[5];
temp1[6] = temp0[2] - temp0[6];
temp1[7] = temp0[3] - temp0[7];
temp1[8] = temp0[8] + temp0[12];
temp1[9] = temp0[9] + temp0[13];
temp1[10] = temp0[10] + temp0[14];
temp1[11] = temp0[11] + temp0[15];
temp1[12] = temp0[8] - temp0[12];
temp1[13] = temp0[9] - temp0[13];
temp1[14] = temp0[10] - temp0[14];
temp1[15] = temp0[11] - temp0[15];
// stage 6
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = temp1[4];
temp0[5] = temp1[5];
temp0[6] = temp1[6];
temp0[7] = temp1[7];
temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[8], cospi[56], temp1[9], cosBit);
temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[8], -cospi[8], temp1[9], cosBit);
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[10], cospi[24], temp1[11], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[10], -cospi[40], temp1[11], cosBit);
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[12], cospi[8], temp1[13], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[12], cospi[56], temp1[13], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[14], cospi[40], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[14], cospi[24], temp1[15], cosBit);
// stage 7
temp1[0] = temp0[0] + temp0[8];
temp1[1] = temp0[1] + temp0[9];
temp1[2] = temp0[2] + temp0[10];
temp1[3] = temp0[3] + temp0[11];
temp1[4] = temp0[4] + temp0[12];
temp1[5] = temp0[5] + temp0[13];
temp1[6] = temp0[6] + temp0[14];
temp1[7] = temp0[7] + temp0[15];
temp1[8] = temp0[0] - temp0[8];
temp1[9] = temp0[1] - temp0[9];
temp1[10] = temp0[2] - temp0[10];
temp1[11] = temp0[3] - temp0[11];
temp1[12] = temp0[4] - temp0[12];
temp1[13] = temp0[5] - temp0[13];
temp1[14] = temp0[6] - temp0[14];
temp1[15] = temp0[7] - temp0[15];
// stage 8
temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[2], temp1[0], cospi[62], temp1[1], cosBit);
temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp1[0], -cospi[2], temp1[1], cosBit);
temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[10], temp1[2], cospi[54], temp1[3], cosBit);
temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp1[2], -cospi[10], temp1[3], cosBit);
temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[18], temp1[4], cospi[46], temp1[5], cosBit);
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp1[4], -cospi[18], temp1[5], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[26], temp1[6], cospi[38], temp1[7], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp1[6], -cospi[26], temp1[7], cosBit);
temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[34], temp1[8], cospi[30], temp1[9], cosBit);
temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp1[8], -cospi[34], temp1[9], cosBit);
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[42], temp1[10], cospi[22], temp1[11], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp1[10], -cospi[42], temp1[11], cosBit);
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[50], temp1[12], cospi[14], temp1[13], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp1[12], -cospi[50], temp1[13], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[58], temp1[14], cospi[6], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp1[14], -cospi[58], temp1[15], cosBit);
// stage 9
output = temp0[1];
Unsafe.Add(ref output, 1) = temp0[14];
Unsafe.Add(ref output, 2) = temp0[3];
Unsafe.Add(ref output, 3) = temp0[12];
Unsafe.Add(ref output, 4) = temp0[5];
Unsafe.Add(ref output, 5) = temp0[10];
Unsafe.Add(ref output, 6) = temp0[7];
Unsafe.Add(ref output, 7) = temp0[8];
Unsafe.Add(ref output, 8) = temp0[9];
Unsafe.Add(ref output, 9) = temp0[6];
Unsafe.Add(ref output, 10) = temp0[11];
Unsafe.Add(ref output, 11) = temp0[4];
Unsafe.Add(ref output, 12) = temp0[13];
Unsafe.Add(ref output, 13) = temp0[2];
Unsafe.Add(ref output, 14) = temp0[15];
Unsafe.Add(ref output, 15) = temp0[0];
}
}

387
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs

@ -1,10 +1,395 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Adst32Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output, cosBit);
private static void TransformScalar(ref int input, ref int outputRef, int cosBit)
{
Span<int> temp0 = stackalloc int[32];
Span<int> temp1 = stackalloc int[32];
// stage 0;
// stage 1;
temp1[0] = Unsafe.Add(ref input, 31);
temp1[1] = input;
temp1[2] = Unsafe.Add(ref input, 29);
temp1[3] = Unsafe.Add(ref input, 2);
temp1[4] = Unsafe.Add(ref input, 27);
temp1[5] = Unsafe.Add(ref input, 4);
temp1[6] = Unsafe.Add(ref input, 25);
temp1[7] = Unsafe.Add(ref input, 6);
temp1[8] = Unsafe.Add(ref input, 23);
temp1[9] = Unsafe.Add(ref input, 8);
temp1[10] = Unsafe.Add(ref input, 21);
temp1[11] = Unsafe.Add(ref input, 10);
temp1[12] = Unsafe.Add(ref input, 19);
temp1[13] = Unsafe.Add(ref input, 12);
temp1[14] = Unsafe.Add(ref input, 17);
temp1[15] = Unsafe.Add(ref input, 14);
temp1[16] = Unsafe.Add(ref input, 15);
temp1[17] = Unsafe.Add(ref input, 16);
temp1[18] = Unsafe.Add(ref input, 13);
temp1[19] = Unsafe.Add(ref input, 18);
temp1[20] = Unsafe.Add(ref input, 11);
temp1[21] = Unsafe.Add(ref input, 20);
temp1[22] = Unsafe.Add(ref input, 9);
temp1[23] = Unsafe.Add(ref input, 22);
temp1[24] = Unsafe.Add(ref input, 7);
temp1[25] = Unsafe.Add(ref input, 24);
temp1[26] = Unsafe.Add(ref input, 5);
temp1[27] = Unsafe.Add(ref input, 26);
temp1[28] = Unsafe.Add(ref input, 3);
temp1[29] = Unsafe.Add(ref input, 28);
temp1[30] = Unsafe.Add(ref input, 1);
temp1[31] = Unsafe.Add(ref input, 30);
// stage 2
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit);
temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[1], temp1[0], cospi[63], temp1[1], cosBit);
temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[1], temp1[1], cospi[63], temp1[0], cosBit);
temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[5], temp1[2], cospi[59], temp1[3], cosBit);
temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[5], temp1[3], cospi[59], temp1[2], cosBit);
temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[9], temp1[4], cospi[55], temp1[5], cosBit);
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[9], temp1[5], cospi[55], temp1[4], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[13], temp1[6], cospi[51], temp1[7], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[13], temp1[7], cospi[51], temp1[6], cosBit);
temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[17], temp1[8], cospi[47], temp1[9], cosBit);
temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[17], temp1[9], cospi[47], temp1[8], cosBit);
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[21], temp1[10], cospi[43], temp1[11], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[21], temp1[11], cospi[43], temp1[10], cosBit);
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[25], temp1[12], cospi[39], temp1[13], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[25], temp1[13], cospi[39], temp1[12], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[29], temp1[14], cospi[35], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[29], temp1[15], cospi[35], temp1[14], cosBit);
temp0[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[33], temp1[16], cospi[31], temp1[17], cosBit);
temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[33], temp1[17], cospi[31], temp1[16], cosBit);
temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[37], temp1[18], cospi[27], temp1[19], cosBit);
temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[37], temp1[19], cospi[27], temp1[18], cosBit);
temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[41], temp1[20], cospi[23], temp1[21], cosBit);
temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[41], temp1[21], cospi[23], temp1[20], cosBit);
temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[45], temp1[22], cospi[19], temp1[23], cosBit);
temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[45], temp1[23], cospi[19], temp1[22], cosBit);
temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[49], temp1[24], cospi[15], temp1[25], cosBit);
temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[49], temp1[25], cospi[15], temp1[24], cosBit);
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[53], temp1[26], cospi[11], temp1[27], cosBit);
temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[53], temp1[27], cospi[11], temp1[26], cosBit);
temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[57], temp1[28], cospi[7], temp1[29], cosBit);
temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[57], temp1[29], cospi[7], temp1[28], cosBit);
temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[61], temp1[30], cospi[3], temp1[31], cosBit);
temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[61], temp1[31], cospi[3], temp1[30], cosBit);
// stage 3
temp1[0] = temp0[0] + temp0[16];
temp1[1] = temp0[1] + temp0[17];
temp1[2] = temp0[2] + temp0[18];
temp1[3] = temp0[3] + temp0[19];
temp1[4] = temp0[4] + temp0[20];
temp1[5] = temp0[5] + temp0[21];
temp1[6] = temp0[6] + temp0[22];
temp1[7] = temp0[7] + temp0[23];
temp1[8] = temp0[8] + temp0[24];
temp1[9] = temp0[9] + temp0[25];
temp1[10] = temp0[10] + temp0[26];
temp1[11] = temp0[11] + temp0[27];
temp1[12] = temp0[12] + temp0[28];
temp1[13] = temp0[13] + temp0[29];
temp1[14] = temp0[14] + temp0[30];
temp1[15] = temp0[15] + temp0[31];
temp1[16] = -temp0[16] + temp0[0];
temp1[17] = -temp0[17] + temp0[1];
temp1[18] = -temp0[18] + temp0[2];
temp1[19] = -temp0[19] + temp0[3];
temp1[20] = -temp0[20] + temp0[4];
temp1[21] = -temp0[21] + temp0[5];
temp1[22] = -temp0[22] + temp0[6];
temp1[23] = -temp0[23] + temp0[7];
temp1[24] = -temp0[24] + temp0[8];
temp1[25] = -temp0[25] + temp0[9];
temp1[26] = -temp0[26] + temp0[10];
temp1[27] = -temp0[27] + temp0[11];
temp1[28] = -temp0[28] + temp0[12];
temp1[29] = -temp0[29] + temp0[13];
temp1[30] = -temp0[30] + temp0[14];
temp1[31] = -temp0[31] + temp0[15];
// stage 4
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = temp1[4];
temp0[5] = temp1[5];
temp0[6] = temp1[6];
temp0[7] = temp1[7];
temp0[8] = temp1[8];
temp0[9] = temp1[9];
temp0[10] = temp1[10];
temp0[11] = temp1[11];
temp0[12] = temp1[12];
temp0[13] = temp1[13];
temp0[14] = temp1[14];
temp0[15] = temp1[15];
temp0[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[4], temp1[16], cospi[60], temp1[17], cosBit);
temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[4], temp1[17], cospi[60], temp1[16], cosBit);
temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[20], temp1[18], cospi[44], temp1[19], cosBit);
temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[20], temp1[19], cospi[44], temp1[18], cosBit);
temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[36], temp1[20], cospi[28], temp1[21], cosBit);
temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[36], temp1[21], cospi[28], temp1[20], cosBit);
temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[52], temp1[22], cospi[12], temp1[23], cosBit);
temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[52], temp1[23], cospi[12], temp1[22], cosBit);
temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[60], temp1[24], cospi[4], temp1[25], cosBit);
temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp1[25], cospi[4], temp1[24], cosBit);
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[44], temp1[26], cospi[20], temp1[27], cosBit);
temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp1[27], cospi[20], temp1[26], cosBit);
temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[28], temp1[28], cospi[36], temp1[29], cosBit);
temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp1[29], cospi[36], temp1[28], cosBit);
temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[12], temp1[30], cospi[52], temp1[31], cosBit);
temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp1[31], cospi[52], temp1[30], cosBit);
// stage 5
temp1[0] = temp0[0] + temp0[8];
temp1[1] = temp0[1] + temp0[9];
temp1[2] = temp0[2] + temp0[10];
temp1[3] = temp0[3] + temp0[11];
temp1[4] = temp0[4] + temp0[12];
temp1[5] = temp0[5] + temp0[13];
temp1[6] = temp0[6] + temp0[14];
temp1[7] = temp0[7] + temp0[15];
temp1[8] = -temp0[8] + temp0[0];
temp1[9] = -temp0[9] + temp0[1];
temp1[10] = -temp0[10] + temp0[2];
temp1[11] = -temp0[11] + temp0[3];
temp1[12] = -temp0[12] + temp0[4];
temp1[13] = -temp0[13] + temp0[5];
temp1[14] = -temp0[14] + temp0[6];
temp1[15] = -temp0[15] + temp0[7];
temp1[16] = temp0[16] + temp0[24];
temp1[17] = temp0[17] + temp0[25];
temp1[18] = temp0[18] + temp0[26];
temp1[19] = temp0[19] + temp0[27];
temp1[20] = temp0[20] + temp0[28];
temp1[21] = temp0[21] + temp0[29];
temp1[22] = temp0[22] + temp0[30];
temp1[23] = temp0[23] + temp0[31];
temp1[24] = -temp0[24] + temp0[16];
temp1[25] = -temp0[25] + temp0[17];
temp1[26] = -temp0[26] + temp0[18];
temp1[27] = -temp0[27] + temp0[19];
temp1[28] = -temp0[28] + temp0[20];
temp1[29] = -temp0[29] + temp0[21];
temp1[30] = -temp0[30] + temp0[22];
temp1[31] = -temp0[31] + temp0[23];
// stage 6
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = temp1[4];
temp0[5] = temp1[5];
temp0[6] = temp1[6];
temp0[7] = temp1[7];
temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[8], cospi[56], temp1[9], cosBit);
temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp1[9], cospi[56], temp1[8], cosBit);
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[10], cospi[24], temp1[11], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp1[11], cospi[24], temp1[10], cosBit);
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[12], cospi[8], temp1[13], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[13], cospi[8], temp1[12], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[14], cospi[40], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[15], cospi[40], temp1[14], cosBit);
temp0[16] = temp1[16];
temp0[17] = temp1[17];
temp0[18] = temp1[18];
temp0[19] = temp1[19];
temp0[20] = temp1[20];
temp0[21] = temp1[21];
temp0[22] = temp1[22];
temp0[23] = temp1[23];
temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[24], cospi[56], temp1[25], cosBit);
temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp1[25], cospi[56], temp1[24], cosBit);
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[26], cospi[24], temp1[27], cosBit);
temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp1[27], cospi[24], temp1[26], cosBit);
temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[28], cospi[8], temp1[29], cosBit);
temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[29], cospi[8], temp1[28], cosBit);
temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[30], cospi[40], temp1[31], cosBit);
temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[31], cospi[40], temp1[30], cosBit);
// stage 7
temp1[0] = temp0[0] + temp0[4];
temp1[1] = temp0[1] + temp0[5];
temp1[2] = temp0[2] + temp0[6];
temp1[3] = temp0[3] + temp0[7];
temp1[4] = -temp0[4] + temp0[0];
temp1[5] = -temp0[5] + temp0[1];
temp1[6] = -temp0[6] + temp0[2];
temp1[7] = -temp0[7] + temp0[3];
temp1[8] = temp0[8] + temp0[12];
temp1[9] = temp0[9] + temp0[13];
temp1[10] = temp0[10] + temp0[14];
temp1[11] = temp0[11] + temp0[15];
temp1[12] = -temp0[12] + temp0[8];
temp1[13] = -temp0[13] + temp0[9];
temp1[14] = -temp0[14] + temp0[10];
temp1[15] = -temp0[15] + temp0[11];
temp1[16] = temp0[16] + temp0[20];
temp1[17] = temp0[17] + temp0[21];
temp1[18] = temp0[18] + temp0[22];
temp1[19] = temp0[19] + temp0[23];
temp1[20] = -temp0[20] + temp0[16];
temp1[21] = -temp0[21] + temp0[17];
temp1[22] = -temp0[22] + temp0[18];
temp1[23] = -temp0[23] + temp0[19];
temp1[24] = temp0[24] + temp0[28];
temp1[25] = temp0[25] + temp0[29];
temp1[26] = temp0[26] + temp0[30];
temp1[27] = temp0[27] + temp0[31];
temp1[28] = -temp0[28] + temp0[24];
temp1[29] = -temp0[29] + temp0[25];
temp1[30] = -temp0[30] + temp0[26];
temp1[31] = -temp0[31] + temp0[27];
// stage 8
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[4], cospi[48], temp1[5], cosBit);
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[5], cospi[48], temp1[4], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[6], cospi[16], temp1[7], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[7], cospi[16], temp1[6], cosBit);
temp0[8] = temp1[8];
temp0[9] = temp1[9];
temp0[10] = temp1[10];
temp0[11] = temp1[11];
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[12], cospi[48], temp1[13], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[13], cospi[48], temp1[12], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[14], cospi[16], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[15], cospi[16], temp1[14], cosBit);
temp0[16] = temp1[16];
temp0[17] = temp1[17];
temp0[18] = temp1[18];
temp0[19] = temp1[19];
temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[20], cospi[48], temp1[21], cosBit);
temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[21], cospi[48], temp1[20], cosBit);
temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[22], cospi[16], temp1[23], cosBit);
temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[23], cospi[16], temp1[22], cosBit);
temp0[24] = temp1[24];
temp0[25] = temp1[25];
temp0[26] = temp1[26];
temp0[27] = temp1[27];
temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[28], cospi[48], temp1[29], cosBit);
temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[29], cospi[48], temp1[28], cosBit);
temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[30], cospi[16], temp1[31], cosBit);
temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[31], cospi[16], temp1[30], cosBit);
// stage 9
temp1[0] = temp0[0] + temp0[2];
temp1[1] = temp0[1] + temp0[3];
temp1[2] = -temp0[2] + temp0[0];
temp1[3] = -temp0[3] + temp0[1];
temp1[4] = temp0[4] + temp0[6];
temp1[5] = temp0[5] + temp0[7];
temp1[6] = -temp0[6] + temp0[4];
temp1[7] = -temp0[7] + temp0[5];
temp1[8] = temp0[8] + temp0[10];
temp1[9] = temp0[9] + temp0[11];
temp1[10] = -temp0[10] + temp0[8];
temp1[11] = -temp0[11] + temp0[9];
temp1[12] = temp0[12] + temp0[14];
temp1[13] = temp0[13] + temp0[15];
temp1[14] = -temp0[14] + temp0[12];
temp1[15] = -temp0[15] + temp0[13];
temp1[16] = temp0[16] + temp0[18];
temp1[17] = temp0[17] + temp0[19];
temp1[18] = -temp0[18] + temp0[16];
temp1[19] = -temp0[19] + temp0[17];
temp1[20] = temp0[20] + temp0[22];
temp1[21] = temp0[21] + temp0[23];
temp1[22] = -temp0[22] + temp0[20];
temp1[23] = -temp0[23] + temp0[21];
temp1[24] = temp0[24] + temp0[26];
temp1[25] = temp0[25] + temp0[27];
temp1[26] = -temp0[26] + temp0[24];
temp1[27] = -temp0[27] + temp0[25];
temp1[28] = temp0[28] + temp0[30];
temp1[29] = temp0[29] + temp0[31];
temp1[30] = -temp0[30] + temp0[28];
temp1[31] = -temp0[31] + temp0[29];
// stage 10
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[2], cospi[32], temp1[3], cosBit);
temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[3], cospi[32], temp1[2], cosBit);
temp0[4] = temp1[4];
temp0[5] = temp1[5];
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[7], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[7], cospi[32], temp1[6], cosBit);
temp0[8] = temp1[8];
temp0[9] = temp1[9];
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[10], cospi[32], temp1[11], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[11], cospi[32], temp1[10], cosBit);
temp0[12] = temp1[12];
temp0[13] = temp1[13];
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[14], cospi[32], temp1[15], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[15], cospi[32], temp1[14], cosBit);
temp0[16] = temp1[16];
temp0[17] = temp1[17];
temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[18], cospi[32], temp1[19], cosBit);
temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[19], cospi[32], temp1[18], cosBit);
temp0[20] = temp1[20];
temp0[21] = temp1[21];
temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[22], cospi[32], temp1[23], cosBit);
temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[23], cospi[32], temp1[22], cosBit);
temp0[24] = temp1[24];
temp0[25] = temp1[25];
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[26], cospi[32], temp1[27], cosBit);
temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[27], cospi[32], temp1[26], cosBit);
temp0[28] = temp1[28];
temp0[29] = temp1[29];
temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[30], cospi[32], temp1[31], cosBit);
temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[31], cospi[32], temp1[30], cosBit);
// stage 11
outputRef = temp0[0];
Unsafe.Add(ref outputRef, 1) = -temp0[16];
Unsafe.Add(ref outputRef, 2) = temp0[24];
Unsafe.Add(ref outputRef, 3) = -temp0[8];
Unsafe.Add(ref outputRef, 4) = temp0[12];
Unsafe.Add(ref outputRef, 5) = -temp0[28];
Unsafe.Add(ref outputRef, 6) = temp0[20];
Unsafe.Add(ref outputRef, 7) = -temp0[4];
Unsafe.Add(ref outputRef, 8) = temp0[6];
Unsafe.Add(ref outputRef, 9) = -temp0[22];
Unsafe.Add(ref outputRef, 10) = temp0[30];
Unsafe.Add(ref outputRef, 11) = -temp0[14];
Unsafe.Add(ref outputRef, 12) = temp0[10];
Unsafe.Add(ref outputRef, 13) = -temp0[26];
Unsafe.Add(ref outputRef, 14) = temp0[18];
Unsafe.Add(ref outputRef, 15) = -temp0[2];
Unsafe.Add(ref outputRef, 16) = temp0[3];
Unsafe.Add(ref outputRef, 17) = -temp0[19];
Unsafe.Add(ref outputRef, 18) = temp0[27];
Unsafe.Add(ref outputRef, 19) = -temp0[11];
Unsafe.Add(ref outputRef, 20) = temp0[15];
Unsafe.Add(ref outputRef, 21) = -temp0[31];
Unsafe.Add(ref outputRef, 22) = temp0[23];
Unsafe.Add(ref outputRef, 23) = -temp0[7];
Unsafe.Add(ref outputRef, 24) = temp0[5];
Unsafe.Add(ref outputRef, 25) = -temp0[21];
Unsafe.Add(ref outputRef, 26) = temp0[29];
Unsafe.Add(ref outputRef, 27) = -temp0[13];
Unsafe.Add(ref outputRef, 28) = temp0[9];
Unsafe.Add(ref outputRef, 29) = -temp0[25];
Unsafe.Add(ref outputRef, 30) = temp0[17];
Unsafe.Add(ref outputRef, 31) = -temp0[1];
}
}

64
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs

@ -1,10 +1,72 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Adst4Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output, cosBit);
private static void TransformScalar(ref int input, ref int output, int cosBit)
{
Span<int> sinpi = Av1SinusConstants.SinusPi(cosBit);
int x0, x1, x2, x3;
int s0, s1, s2, s3, s4, s5, s6, s7;
// stage 0
x0 = input;
x1 = Unsafe.Add(ref input, 1);
x2 = Unsafe.Add(ref input, 2);
x3 = Unsafe.Add(ref input, 3);
if (!(x0 != 0 | x1 != 0 | x2 != 0 | x3 != 0))
{
output = 0;
Unsafe.Add(ref output, 1) = 0;
Unsafe.Add(ref output, 2) = 0;
Unsafe.Add(ref output, 3) = 0;
return;
}
// stage 1
s0 = sinpi[1] * x0;
s1 = sinpi[4] * x0;
s2 = sinpi[2] * x1;
s3 = sinpi[1] * x1;
s4 = sinpi[3] * x2;
s5 = sinpi[4] * x3;
s6 = sinpi[2] * x3;
s7 = x0 + x1;
// stage 2
s7 -= x3;
// stage 3
x0 = s0 + s2;
x1 = sinpi[3] * s7;
x2 = s1 - s3;
x3 = s4;
// stage 4
x0 += s5;
x2 += s6;
// stage 5
s0 = x0 + x3;
s1 = x1;
s2 = x2 - x3;
s3 = x2 - x0;
// stage 6
s3 += x3;
// 1-D transform scaling factor is sqrt(2).
output = Av1Math.RoundShift(s0, cosBit);
Unsafe.Add(ref output, 1) = Av1Math.RoundShift(s1, cosBit);
Unsafe.Add(ref output, 2) = Av1Math.RoundShift(s2, cosBit);
Unsafe.Add(ref output, 3) = Av1Math.RoundShift(s3, cosBit);
}
}

84
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs

@ -1,10 +1,92 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Adst8Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output, cosBit);
private static void TransformScalar(ref int input, ref int output, int cosBit)
{
Span<int> temp0 = stackalloc int[8];
Span<int> temp1 = stackalloc int[8];
// stage 0;
// stage 1;
Guard.IsFalse(output == input, nameof(output), "Cannot operate on same buffer for input and output.");
temp0[0] = input;
temp0[1] = -Unsafe.Add(ref input, 7);
temp0[2] = -Unsafe.Add(ref input, 3);
temp0[3] = Unsafe.Add(ref input, 4);
temp0[4] = -Unsafe.Add(ref input, 1);
temp0[5] = Unsafe.Add(ref input, 6);
temp0[6] = Unsafe.Add(ref input, 2);
temp0[7] = -Unsafe.Add(ref input, 5);
// stage 2
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit);
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[2], cospi[32], temp0[3], cosBit);
temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[2], -cospi[32], temp0[3], cosBit);
temp1[4] = temp0[4];
temp1[5] = temp0[5];
temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], cospi[32], temp0[7], cosBit);
temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], -cospi[32], temp0[7], cosBit);
// stage 3
temp0[0] = temp1[0] + temp1[2];
temp0[1] = temp1[1] + temp1[3];
temp0[2] = temp1[0] - temp1[2];
temp0[3] = temp1[1] - temp1[3];
temp0[4] = temp1[4] + temp1[6];
temp0[5] = temp1[5] + temp1[7];
temp0[6] = temp1[4] - temp1[6];
temp0[7] = temp1[5] - temp1[7];
// stage 4
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = temp0[2];
temp1[3] = temp0[3];
temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[4], cospi[48], temp0[5], cosBit);
temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[4], -cospi[16], temp0[5], cosBit);
temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[6], cospi[16], temp0[7], cosBit);
temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[6], cospi[48], temp0[7], cosBit);
// stage 5
temp0[0] = temp1[0] + temp1[4];
temp0[1] = temp1[1] + temp1[5];
temp0[2] = temp1[2] + temp1[6];
temp0[3] = temp1[3] + temp1[7];
temp0[4] = temp1[0] - temp1[4];
temp0[5] = temp1[1] - temp1[5];
temp0[6] = temp1[2] - temp1[6];
temp0[7] = temp1[3] - temp1[7];
// stage 6
temp1[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[4], temp0[0], cospi[60], temp0[1], cosBit);
temp1[1] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[0], -cospi[4], temp0[1], cosBit);
temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[20], temp0[2], cospi[44], temp0[3], cosBit);
temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[2], -cospi[20], temp0[3], cosBit);
temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[36], temp0[4], cospi[28], temp0[5], cosBit);
temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[4], -cospi[36], temp0[5], cosBit);
temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[52], temp0[6], cospi[12], temp0[7], cosBit);
temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[6], -cospi[52], temp0[7], cosBit);
// stage 7
output = temp1[1];
Unsafe.Add(ref output, 1) = temp1[6];
Unsafe.Add(ref output, 2) = temp1[3];
Unsafe.Add(ref output, 3) = temp1[4];
Unsafe.Add(ref output, 4) = temp1[5];
Unsafe.Add(ref output, 5) = temp1[2];
Unsafe.Add(ref output, 6) = temp1[7];
Unsafe.Add(ref output, 7) = temp1[0];
}
}

139
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs

@ -1,10 +1,147 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Dct16Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output, cosBit);
private static void TransformScalar(ref int input, ref int output, int cosBit)
{
Span<int> temp0 = stackalloc int[16];
Span<int> temp1 = stackalloc int[16];
// stage 0;
// stage 1;
temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 15);
temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 14);
temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 13);
temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 12);
temp0[4] = Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 11);
temp0[5] = Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 10);
temp0[6] = Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 9);
temp0[7] = Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 8);
temp0[8] = -Unsafe.Add(ref input, 8) + Unsafe.Add(ref input, 7);
temp0[9] = -Unsafe.Add(ref input, 9) + Unsafe.Add(ref input, 6);
temp0[10] = -Unsafe.Add(ref input, 10) + Unsafe.Add(ref input, 5);
temp0[11] = -Unsafe.Add(ref input, 11) + Unsafe.Add(ref input, 4);
temp0[12] = -Unsafe.Add(ref input, 12) + Unsafe.Add(ref input, 3);
temp0[13] = -Unsafe.Add(ref input, 13) + Unsafe.Add(ref input, 2);
temp0[14] = -Unsafe.Add(ref input, 14) + Unsafe.Add(ref input, 1);
temp0[15] = -Unsafe.Add(ref input, 15) + Unsafe.Add(ref input, 0);
// stage 2
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit);
temp1[0] = temp0[0] + temp0[7];
temp1[1] = temp0[1] + temp0[6];
temp1[2] = temp0[2] + temp0[5];
temp1[3] = temp0[3] + temp0[4];
temp1[4] = -temp0[4] + temp0[3];
temp1[5] = -temp0[5] + temp0[2];
temp1[6] = -temp0[6] + temp0[1];
temp1[7] = -temp0[7] + temp0[0];
temp1[8] = temp0[8];
temp1[9] = temp0[9];
temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[10], cospi[32], temp0[13], cosBit);
temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[11], cospi[32], temp0[12], cosBit);
temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[12], cospi[32], temp0[11], cosBit);
temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[13], cospi[32], temp0[10], cosBit);
temp1[14] = temp0[14];
temp1[15] = temp0[15];
// stage 3
temp0[0] = temp1[0] + temp1[3];
temp0[1] = temp1[1] + temp1[2];
temp0[2] = -temp1[2] + temp1[1];
temp0[3] = -temp1[3] + temp1[0];
temp0[4] = temp1[4];
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[5], cospi[32], temp1[6], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[5], cosBit);
temp0[7] = temp1[7];
temp0[8] = temp1[8] + temp1[11];
temp0[9] = temp1[9] + temp1[10];
temp0[10] = -temp1[10] + temp1[9];
temp0[11] = -temp1[11] + temp1[8];
temp0[12] = -temp1[12] + temp1[15];
temp0[13] = -temp1[13] + temp1[14];
temp0[14] = temp1[14] + temp1[13];
temp0[15] = temp1[15] + temp1[12];
// stage 4
temp1[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[0], cospi[32], temp0[1], cosBit);
temp1[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[1], cospi[32], temp0[0], cosBit);
temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[2], cospi[16], temp0[3], cosBit);
temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[3], -cospi[16], temp0[2], cosBit);
temp1[4] = temp0[4] + temp0[5];
temp1[5] = -temp0[5] + temp0[4];
temp1[6] = -temp0[6] + temp0[7];
temp1[7] = temp0[7] + temp0[6];
temp1[8] = temp0[8];
temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[9], cospi[48], temp0[14], cosBit);
temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[10], -cospi[16], temp0[13], cosBit);
temp1[11] = temp0[11];
temp1[12] = temp0[12];
temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[13], -cospi[16], temp0[10], cosBit);
temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[14], cospi[48], temp0[9], cosBit);
temp1[15] = temp0[15];
// stage 5
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[4], cospi[8], temp1[7], cosBit);
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[5], cospi[40], temp1[6], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[6], -cospi[40], temp1[5], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[7], -cospi[8], temp1[4], cosBit);
temp0[8] = temp1[8] + temp1[9];
temp0[9] = -temp1[9] + temp1[8];
temp0[10] = -temp1[10] + temp1[11];
temp0[11] = temp1[11] + temp1[10];
temp0[12] = temp1[12] + temp1[13];
temp0[13] = -temp1[13] + temp1[12];
temp0[14] = -temp1[14] + temp1[15];
temp0[15] = temp1[15] + temp1[14];
// stage 6
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = temp0[2];
temp1[3] = temp0[3];
temp1[4] = temp0[4];
temp1[5] = temp0[5];
temp1[6] = temp0[6];
temp1[7] = temp0[7];
temp1[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[8], cospi[4], temp0[15], cosBit);
temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[9], cospi[36], temp0[14], cosBit);
temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[10], cospi[20], temp0[13], cosBit);
temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[11], cospi[52], temp0[12], cosBit);
temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[12], -cospi[52], temp0[11], cosBit);
temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[13], -cospi[20], temp0[10], cosBit);
temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[14], -cospi[36], temp0[9], cosBit);
temp1[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[15], -cospi[4], temp0[8], cosBit);
// stage 7
output = temp1[0];
Unsafe.Add(ref output, 1) = temp1[8];
Unsafe.Add(ref output, 2) = temp1[4];
Unsafe.Add(ref output, 3) = temp1[12];
Unsafe.Add(ref output, 4) = temp1[2];
Unsafe.Add(ref output, 5) = temp1[10];
Unsafe.Add(ref output, 6) = temp1[6];
Unsafe.Add(ref output, 7) = temp1[14];
Unsafe.Add(ref output, 8) = temp1[1];
Unsafe.Add(ref output, 9) = temp1[9];
Unsafe.Add(ref output, 10) = temp1[5];
Unsafe.Add(ref output, 11) = temp1[13];
Unsafe.Add(ref output, 12) = temp1[3];
Unsafe.Add(ref output, 13) = temp1[11];
Unsafe.Add(ref output, 14) = temp1[7];
Unsafe.Add(ref output, 15) = temp1[15];
}
}

319
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs

@ -1,10 +1,327 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Dct32Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output, cosBit);
private static void TransformScalar(ref int input, ref int output, int cosBit)
{
Span<int> temp0 = stackalloc int[32];
Span<int> temp1 = stackalloc int[32];
// stage 0;
// stage 1;
temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 31);
temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 30);
temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 29);
temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 28);
temp0[4] = Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 27);
temp0[5] = Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 26);
temp0[6] = Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 25);
temp0[7] = Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 24);
temp0[8] = Unsafe.Add(ref input, 8) + Unsafe.Add(ref input, 23);
temp0[9] = Unsafe.Add(ref input, 9) + Unsafe.Add(ref input, 22);
temp0[10] = Unsafe.Add(ref input, 10) + Unsafe.Add(ref input, 21);
temp0[11] = Unsafe.Add(ref input, 11) + Unsafe.Add(ref input, 20);
temp0[12] = Unsafe.Add(ref input, 12) + Unsafe.Add(ref input, 19);
temp0[13] = Unsafe.Add(ref input, 13) + Unsafe.Add(ref input, 18);
temp0[14] = Unsafe.Add(ref input, 14) + Unsafe.Add(ref input, 17);
temp0[15] = Unsafe.Add(ref input, 15) + Unsafe.Add(ref input, 16);
temp0[16] = -Unsafe.Add(ref input, 16) + Unsafe.Add(ref input, 15);
temp0[17] = -Unsafe.Add(ref input, 17) + Unsafe.Add(ref input, 14);
temp0[18] = -Unsafe.Add(ref input, 18) + Unsafe.Add(ref input, 13);
temp0[19] = -Unsafe.Add(ref input, 19) + Unsafe.Add(ref input, 12);
temp0[20] = -Unsafe.Add(ref input, 20) + Unsafe.Add(ref input, 11);
temp0[21] = -Unsafe.Add(ref input, 21) + Unsafe.Add(ref input, 10);
temp0[22] = -Unsafe.Add(ref input, 22) + Unsafe.Add(ref input, 9);
temp0[23] = -Unsafe.Add(ref input, 23) + Unsafe.Add(ref input, 8);
temp0[24] = -Unsafe.Add(ref input, 24) + Unsafe.Add(ref input, 7);
temp0[25] = -Unsafe.Add(ref input, 25) + Unsafe.Add(ref input, 6);
temp0[26] = -Unsafe.Add(ref input, 26) + Unsafe.Add(ref input, 5);
temp0[27] = -Unsafe.Add(ref input, 27) + Unsafe.Add(ref input, 4);
temp0[28] = -Unsafe.Add(ref input, 28) + Unsafe.Add(ref input, 3);
temp0[29] = -Unsafe.Add(ref input, 29) + Unsafe.Add(ref input, 2);
temp0[30] = -Unsafe.Add(ref input, 30) + Unsafe.Add(ref input, 1);
temp0[31] = -Unsafe.Add(ref input, 31) + Unsafe.Add(ref input, 0);
// stage 2
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit);
temp1[0] = temp0[0] + temp0[15];
temp1[1] = temp0[1] + temp0[14];
temp1[2] = temp0[2] + temp0[13];
temp1[3] = temp0[3] + temp0[12];
temp1[4] = temp0[4] + temp0[11];
temp1[5] = temp0[5] + temp0[10];
temp1[6] = temp0[6] + temp0[9];
temp1[7] = temp0[7] + temp0[8];
temp1[8] = -temp0[8] + temp0[7];
temp1[9] = -temp0[9] + temp0[6];
temp1[10] = -temp0[10] + temp0[5];
temp1[11] = -temp0[11] + temp0[4];
temp1[12] = -temp0[12] + temp0[3];
temp1[13] = -temp0[13] + temp0[2];
temp1[14] = -temp0[14] + temp0[1];
temp1[15] = -temp0[15] + temp0[0];
temp1[16] = temp0[16];
temp1[17] = temp0[17];
temp1[18] = temp0[18];
temp1[19] = temp0[19];
temp1[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[20], cospi[32], temp0[27], cosBit);
temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[21], cospi[32], temp0[26], cosBit);
temp1[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[22], cospi[32], temp0[25], cosBit);
temp1[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[23], cospi[32], temp0[24], cosBit);
temp1[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[24], cospi[32], temp0[23], cosBit);
temp1[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[25], cospi[32], temp0[22], cosBit);
temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[26], cospi[32], temp0[21], cosBit);
temp1[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[27], cospi[32], temp0[20], cosBit);
temp1[28] = temp0[28];
temp1[29] = temp0[29];
temp1[30] = temp0[30];
temp1[31] = temp0[31];
// stage 3
temp0[0] = temp1[0] + temp1[7];
temp0[1] = temp1[1] + temp1[6];
temp0[2] = temp1[2] + temp1[5];
temp0[3] = temp1[3] + temp1[4];
temp0[4] = -temp1[4] + temp1[3];
temp0[5] = -temp1[5] + temp1[2];
temp0[6] = -temp1[6] + temp1[1];
temp0[7] = -temp1[7] + temp1[0];
temp0[8] = temp1[8];
temp0[9] = temp1[9];
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[10], cospi[32], temp1[13], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[11], cospi[32], temp1[12], cosBit);
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[12], cospi[32], temp1[11], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[13], cospi[32], temp1[10], cosBit);
temp0[14] = temp1[14];
temp0[15] = temp1[15];
temp0[16] = temp1[16] + temp1[23];
temp0[17] = temp1[17] + temp1[22];
temp0[18] = temp1[18] + temp1[21];
temp0[19] = temp1[19] + temp1[20];
temp0[20] = -temp1[20] + temp1[19];
temp0[21] = -temp1[21] + temp1[18];
temp0[22] = -temp1[22] + temp1[17];
temp0[23] = -temp1[23] + temp1[16];
temp0[24] = -temp1[24] + temp1[31];
temp0[25] = -temp1[25] + temp1[30];
temp0[26] = -temp1[26] + temp1[29];
temp0[27] = -temp1[27] + temp1[28];
temp0[28] = temp1[28] + temp1[27];
temp0[29] = temp1[29] + temp1[26];
temp0[30] = temp1[30] + temp1[25];
temp0[31] = temp1[31] + temp1[24];
// stage 4
temp1[0] = temp0[0] + temp0[3];
temp1[1] = temp0[1] + temp0[2];
temp1[2] = -temp0[2] + temp0[1];
temp1[3] = -temp0[3] + temp0[0];
temp1[4] = temp0[4];
temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[5], cospi[32], temp0[6], cosBit);
temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], cospi[32], temp0[5], cosBit);
temp1[7] = temp0[7];
temp1[8] = temp0[8] + temp0[11];
temp1[9] = temp0[9] + temp0[10];
temp1[10] = -temp0[10] + temp0[9];
temp1[11] = -temp0[11] + temp0[8];
temp1[12] = -temp0[12] + temp0[15];
temp1[13] = -temp0[13] + temp0[14];
temp1[14] = temp0[14] + temp0[13];
temp1[15] = temp0[15] + temp0[12];
temp1[16] = temp0[16];
temp1[17] = temp0[17];
temp1[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[18], cospi[48], temp0[29], cosBit);
temp1[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[19], cospi[48], temp0[28], cosBit);
temp1[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[20], -cospi[16], temp0[27], cosBit);
temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[21], -cospi[16], temp0[26], cosBit);
temp1[22] = temp0[22];
temp1[23] = temp0[23];
temp1[24] = temp0[24];
temp1[25] = temp0[25];
temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[26], -cospi[16], temp0[21], cosBit);
temp1[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[27], -cospi[16], temp0[20], cosBit);
temp1[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[28], cospi[48], temp0[19], cosBit);
temp1[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[29], cospi[48], temp0[18], cosBit);
temp1[30] = temp0[30];
temp1[31] = temp0[31];
// stage 5
temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[0], cospi[32], temp1[1], cosBit);
temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[1], cospi[32], temp1[0], cosBit);
temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[2], cospi[16], temp1[3], cosBit);
temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[3], -cospi[16], temp1[2], cosBit);
temp0[4] = temp1[4] + temp1[5];
temp0[5] = -temp1[5] + temp1[4];
temp0[6] = -temp1[6] + temp1[7];
temp0[7] = temp1[7] + temp1[6];
temp0[8] = temp1[8];
temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[9], cospi[48], temp1[14], cosBit);
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[10], -cospi[16], temp1[13], cosBit);
temp0[11] = temp1[11];
temp0[12] = temp1[12];
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[13], -cospi[16], temp1[10], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[14], cospi[48], temp1[9], cosBit);
temp0[15] = temp1[15];
temp0[16] = temp1[16] + temp1[19];
temp0[17] = temp1[17] + temp1[18];
temp0[18] = -temp1[18] + temp1[17];
temp0[19] = -temp1[19] + temp1[16];
temp0[20] = -temp1[20] + temp1[23];
temp0[21] = -temp1[21] + temp1[22];
temp0[22] = temp1[22] + temp1[21];
temp0[23] = temp1[23] + temp1[20];
temp0[24] = temp1[24] + temp1[27];
temp0[25] = temp1[25] + temp1[26];
temp0[26] = -temp1[26] + temp1[25];
temp0[27] = -temp1[27] + temp1[24];
temp0[28] = -temp1[28] + temp1[31];
temp0[29] = -temp1[29] + temp1[30];
temp0[30] = temp1[30] + temp1[29];
temp0[31] = temp1[31] + temp1[28];
// stage 6
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = temp0[2];
temp1[3] = temp0[3];
temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[4], cospi[8], temp0[7], cosBit);
temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[5], cospi[40], temp0[6], cosBit);
temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[6], -cospi[40], temp0[5], cosBit);
temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[7], -cospi[8], temp0[4], cosBit);
temp1[8] = temp0[8] + temp0[9];
temp1[9] = -temp0[9] + temp0[8];
temp1[10] = -temp0[10] + temp0[11];
temp1[11] = temp0[11] + temp0[10];
temp1[12] = temp0[12] + temp0[13];
temp1[13] = -temp0[13] + temp0[12];
temp1[14] = -temp0[14] + temp0[15];
temp1[15] = temp0[15] + temp0[14];
temp1[16] = temp0[16];
temp1[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp0[17], cospi[56], temp0[30], cosBit);
temp1[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp0[18], -cospi[8], temp0[29], cosBit);
temp1[19] = temp0[19];
temp1[20] = temp0[20];
temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp0[21], cospi[24], temp0[26], cosBit);
temp1[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp0[22], -cospi[40], temp0[25], cosBit);
temp1[23] = temp0[23];
temp1[24] = temp0[24];
temp1[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[25], -cospi[40], temp0[22], cosBit);
temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp0[26], cospi[24], temp0[21], cosBit);
temp1[27] = temp0[27];
temp1[28] = temp0[28];
temp1[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[29], -cospi[8], temp0[18], cosBit);
temp1[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp0[30], cospi[56], temp0[17], cosBit);
temp1[31] = temp0[31];
// stage 7
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = temp1[4];
temp0[5] = temp1[5];
temp0[6] = temp1[6];
temp0[7] = temp1[7];
temp0[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp1[8], cospi[4], temp1[15], cosBit);
temp0[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp1[9], cospi[36], temp1[14], cosBit);
temp0[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp1[10], cospi[20], temp1[13], cosBit);
temp0[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp1[11], cospi[52], temp1[12], cosBit);
temp0[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp1[12], -cospi[52], temp1[11], cosBit);
temp0[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp1[13], -cospi[20], temp1[10], cosBit);
temp0[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp1[14], -cospi[36], temp1[9], cosBit);
temp0[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp1[15], -cospi[4], temp1[8], cosBit);
temp0[16] = temp1[16] + temp1[17];
temp0[17] = -temp1[17] + temp1[16];
temp0[18] = -temp1[18] + temp1[19];
temp0[19] = temp1[19] + temp1[18];
temp0[20] = temp1[20] + temp1[21];
temp0[21] = -temp1[21] + temp1[20];
temp0[22] = -temp1[22] + temp1[23];
temp0[23] = temp1[23] + temp1[22];
temp0[24] = temp1[24] + temp1[25];
temp0[25] = -temp1[25] + temp1[24];
temp0[26] = -temp1[26] + temp1[27];
temp0[27] = temp1[27] + temp1[26];
temp0[28] = temp1[28] + temp1[29];
temp0[29] = -temp1[29] + temp1[28];
temp0[30] = -temp1[30] + temp1[31];
temp0[31] = temp1[31] + temp1[30];
// stage 8
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = temp0[2];
temp1[3] = temp0[3];
temp1[4] = temp0[4];
temp1[5] = temp0[5];
temp1[6] = temp0[6];
temp1[7] = temp0[7];
temp1[8] = temp0[8];
temp1[9] = temp0[9];
temp1[10] = temp0[10];
temp1[11] = temp0[11];
temp1[12] = temp0[12];
temp1[13] = temp0[13];
temp1[14] = temp0[14];
temp1[15] = temp0[15];
temp1[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp0[16], cospi[2], temp0[31], cosBit);
temp1[17] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp0[17], cospi[34], temp0[30], cosBit);
temp1[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp0[18], cospi[18], temp0[29], cosBit);
temp1[19] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp0[19], cospi[50], temp0[28], cosBit);
temp1[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp0[20], cospi[10], temp0[27], cosBit);
temp1[21] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp0[21], cospi[42], temp0[26], cosBit);
temp1[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp0[22], cospi[26], temp0[25], cosBit);
temp1[23] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp0[23], cospi[58], temp0[24], cosBit);
temp1[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp0[24], -cospi[58], temp0[23], cosBit);
temp1[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp0[25], -cospi[26], temp0[22], cosBit);
temp1[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp0[26], -cospi[42], temp0[21], cosBit);
temp1[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp0[27], -cospi[10], temp0[20], cosBit);
temp1[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp0[28], -cospi[50], temp0[19], cosBit);
temp1[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp0[29], -cospi[18], temp0[18], cosBit);
temp1[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp0[30], -cospi[34], temp0[17], cosBit);
temp1[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp0[31], -cospi[2], temp0[16], cosBit);
// stage 9
Unsafe.Add(ref output, 0) = temp1[0];
Unsafe.Add(ref output, 1) = temp1[16];
Unsafe.Add(ref output, 2) = temp1[8];
Unsafe.Add(ref output, 3) = temp1[24];
Unsafe.Add(ref output, 4) = temp1[4];
Unsafe.Add(ref output, 5) = temp1[20];
Unsafe.Add(ref output, 6) = temp1[12];
Unsafe.Add(ref output, 7) = temp1[28];
Unsafe.Add(ref output, 8) = temp1[2];
Unsafe.Add(ref output, 9) = temp1[18];
Unsafe.Add(ref output, 10) = temp1[10];
Unsafe.Add(ref output, 11) = temp1[26];
Unsafe.Add(ref output, 12) = temp1[6];
Unsafe.Add(ref output, 13) = temp1[22];
Unsafe.Add(ref output, 14) = temp1[14];
Unsafe.Add(ref output, 15) = temp1[30];
Unsafe.Add(ref output, 16) = temp1[1];
Unsafe.Add(ref output, 17) = temp1[17];
Unsafe.Add(ref output, 18) = temp1[9];
Unsafe.Add(ref output, 19) = temp1[25];
Unsafe.Add(ref output, 20) = temp1[5];
Unsafe.Add(ref output, 21) = temp1[21];
Unsafe.Add(ref output, 22) = temp1[13];
Unsafe.Add(ref output, 23) = temp1[29];
Unsafe.Add(ref output, 24) = temp1[3];
Unsafe.Add(ref output, 25) = temp1[19];
Unsafe.Add(ref output, 26) = temp1[11];
Unsafe.Add(ref output, 27) = temp1[27];
Unsafe.Add(ref output, 28) = temp1[7];
Unsafe.Add(ref output, 29) = temp1[23];
Unsafe.Add(ref output, 30) = temp1[15];
Unsafe.Add(ref output, 31) = temp1[31];
}
}

10
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs

@ -33,10 +33,10 @@ internal class Av1Dct4Forward1dTransformer : IAv1Forward1dTransformer
output3 = -Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 0);
// stage 2
step0 = HalfBtf(cospi[32], output, cospi[32], output1, cosBit);
step1 = HalfBtf(-cospi[32], output1, cospi[32], output, cosBit);
step2 = HalfBtf(cospi[48], output2, cospi[16], output3, cosBit);
step3 = HalfBtf(cospi[48], output3, -cospi[16], output2, cosBit);
step0 = HalfButterfly(cospi[32], output, cospi[32], output1, cosBit);
step1 = HalfButterfly(-cospi[32], output1, cospi[32], output, cosBit);
step2 = HalfButterfly(cospi[48], output2, cospi[16], output3, cosBit);
step3 = HalfButterfly(cospi[48], output3, -cospi[16], output2, cosBit);
// stage 3
output = step0;
@ -45,7 +45,7 @@ internal class Av1Dct4Forward1dTransformer : IAv1Forward1dTransformer
output3 = step3;
}
private static int HalfBtf(int w0, int in0, int w1, int in1, int bit)
internal static int HalfButterfly(int w0, int in0, int w1, int in1, int bit)
{
long result64 = (long)(w0 * in0) + (w1 * in1);
long intermediate = result64 + (1L << (bit - 1));

739
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs

@ -1,10 +1,747 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Dct64Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransforScalar(ref input, ref output, cosBit);
private static void TransforScalar(ref int input, ref int output, int cosBit)
{
Span<int> temp0 = stackalloc int[64];
Span<int> temp1 = stackalloc int[64];
// stage 0;
// stage 1;
temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 63);
temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 62);
temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 61);
temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 60);
temp0[4] = Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 59);
temp0[5] = Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 58);
temp0[6] = Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 57);
temp0[7] = Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 56);
temp0[8] = Unsafe.Add(ref input, 8) + Unsafe.Add(ref input, 55);
temp0[9] = Unsafe.Add(ref input, 9) + Unsafe.Add(ref input, 54);
temp0[10] = Unsafe.Add(ref input, 10) + Unsafe.Add(ref input, 53);
temp0[11] = Unsafe.Add(ref input, 11) + Unsafe.Add(ref input, 52);
temp0[12] = Unsafe.Add(ref input, 12) + Unsafe.Add(ref input, 51);
temp0[13] = Unsafe.Add(ref input, 13) + Unsafe.Add(ref input, 50);
temp0[14] = Unsafe.Add(ref input, 14) + Unsafe.Add(ref input, 49);
temp0[15] = Unsafe.Add(ref input, 15) + Unsafe.Add(ref input, 48);
temp0[16] = Unsafe.Add(ref input, 16) + Unsafe.Add(ref input, 47);
temp0[17] = Unsafe.Add(ref input, 17) + Unsafe.Add(ref input, 46);
temp0[18] = Unsafe.Add(ref input, 18) + Unsafe.Add(ref input, 45);
temp0[19] = Unsafe.Add(ref input, 19) + Unsafe.Add(ref input, 44);
temp0[20] = Unsafe.Add(ref input, 20) + Unsafe.Add(ref input, 43);
temp0[21] = Unsafe.Add(ref input, 21) + Unsafe.Add(ref input, 42);
temp0[22] = Unsafe.Add(ref input, 22) + Unsafe.Add(ref input, 41);
temp0[23] = Unsafe.Add(ref input, 23) + Unsafe.Add(ref input, 40);
temp0[24] = Unsafe.Add(ref input, 24) + Unsafe.Add(ref input, 39);
temp0[25] = Unsafe.Add(ref input, 25) + Unsafe.Add(ref input, 38);
temp0[26] = Unsafe.Add(ref input, 26) + Unsafe.Add(ref input, 37);
temp0[27] = Unsafe.Add(ref input, 27) + Unsafe.Add(ref input, 36);
temp0[28] = Unsafe.Add(ref input, 28) + Unsafe.Add(ref input, 35);
temp0[29] = Unsafe.Add(ref input, 29) + Unsafe.Add(ref input, 34);
temp0[30] = Unsafe.Add(ref input, 30) + Unsafe.Add(ref input, 33);
temp0[31] = Unsafe.Add(ref input, 31) + Unsafe.Add(ref input, 32);
temp0[32] = -Unsafe.Add(ref input, 32) + Unsafe.Add(ref input, 31);
temp0[33] = -Unsafe.Add(ref input, 33) + Unsafe.Add(ref input, 30);
temp0[34] = -Unsafe.Add(ref input, 34) + Unsafe.Add(ref input, 29);
temp0[35] = -Unsafe.Add(ref input, 35) + Unsafe.Add(ref input, 28);
temp0[36] = -Unsafe.Add(ref input, 36) + Unsafe.Add(ref input, 27);
temp0[37] = -Unsafe.Add(ref input, 37) + Unsafe.Add(ref input, 26);
temp0[38] = -Unsafe.Add(ref input, 38) + Unsafe.Add(ref input, 25);
temp0[39] = -Unsafe.Add(ref input, 39) + Unsafe.Add(ref input, 24);
temp0[40] = -Unsafe.Add(ref input, 40) + Unsafe.Add(ref input, 23);
temp0[41] = -Unsafe.Add(ref input, 41) + Unsafe.Add(ref input, 22);
temp0[42] = -Unsafe.Add(ref input, 42) + Unsafe.Add(ref input, 21);
temp0[43] = -Unsafe.Add(ref input, 43) + Unsafe.Add(ref input, 20);
temp0[44] = -Unsafe.Add(ref input, 44) + Unsafe.Add(ref input, 19);
temp0[45] = -Unsafe.Add(ref input, 45) + Unsafe.Add(ref input, 18);
temp0[46] = -Unsafe.Add(ref input, 46) + Unsafe.Add(ref input, 17);
temp0[47] = -Unsafe.Add(ref input, 47) + Unsafe.Add(ref input, 16);
temp0[48] = -Unsafe.Add(ref input, 48) + Unsafe.Add(ref input, 15);
temp0[49] = -Unsafe.Add(ref input, 49) + Unsafe.Add(ref input, 14);
temp0[50] = -Unsafe.Add(ref input, 50) + Unsafe.Add(ref input, 13);
temp0[51] = -Unsafe.Add(ref input, 51) + Unsafe.Add(ref input, 12);
temp0[52] = -Unsafe.Add(ref input, 52) + Unsafe.Add(ref input, 11);
temp0[53] = -Unsafe.Add(ref input, 53) + Unsafe.Add(ref input, 10);
temp0[54] = -Unsafe.Add(ref input, 54) + Unsafe.Add(ref input, 9);
temp0[55] = -Unsafe.Add(ref input, 55) + Unsafe.Add(ref input, 8);
temp0[56] = -Unsafe.Add(ref input, 56) + Unsafe.Add(ref input, 7);
temp0[57] = -Unsafe.Add(ref input, 57) + Unsafe.Add(ref input, 6);
temp0[58] = -Unsafe.Add(ref input, 58) + Unsafe.Add(ref input, 5);
temp0[59] = -Unsafe.Add(ref input, 59) + Unsafe.Add(ref input, 4);
temp0[60] = -Unsafe.Add(ref input, 60) + Unsafe.Add(ref input, 3);
temp0[61] = -Unsafe.Add(ref input, 61) + Unsafe.Add(ref input, 2);
temp0[62] = -Unsafe.Add(ref input, 62) + Unsafe.Add(ref input, 1);
temp0[63] = -Unsafe.Add(ref input, 63) + Unsafe.Add(ref input, 0);
// stage 2
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit);
temp1[0] = temp0[0] + temp0[31];
temp1[1] = temp0[1] + temp0[30];
temp1[2] = temp0[2] + temp0[29];
temp1[3] = temp0[3] + temp0[28];
temp1[4] = temp0[4] + temp0[27];
temp1[5] = temp0[5] + temp0[26];
temp1[6] = temp0[6] + temp0[25];
temp1[7] = temp0[7] + temp0[24];
temp1[8] = temp0[8] + temp0[23];
temp1[9] = temp0[9] + temp0[22];
temp1[10] = temp0[10] + temp0[21];
temp1[11] = temp0[11] + temp0[20];
temp1[12] = temp0[12] + temp0[19];
temp1[13] = temp0[13] + temp0[18];
temp1[14] = temp0[14] + temp0[17];
temp1[15] = temp0[15] + temp0[16];
temp1[16] = -temp0[16] + temp0[15];
temp1[17] = -temp0[17] + temp0[14];
temp1[18] = -temp0[18] + temp0[13];
temp1[19] = -temp0[19] + temp0[12];
temp1[20] = -temp0[20] + temp0[11];
temp1[21] = -temp0[21] + temp0[10];
temp1[22] = -temp0[22] + temp0[9];
temp1[23] = -temp0[23] + temp0[8];
temp1[24] = -temp0[24] + temp0[7];
temp1[25] = -temp0[25] + temp0[6];
temp1[26] = -temp0[26] + temp0[5];
temp1[27] = -temp0[27] + temp0[4];
temp1[28] = -temp0[28] + temp0[3];
temp1[29] = -temp0[29] + temp0[2];
temp1[30] = -temp0[30] + temp0[1];
temp1[31] = -temp0[31] + temp0[0];
temp1[32] = temp0[32];
temp1[33] = temp0[33];
temp1[34] = temp0[34];
temp1[35] = temp0[35];
temp1[36] = temp0[36];
temp1[37] = temp0[37];
temp1[38] = temp0[38];
temp1[39] = temp0[39];
temp1[40] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[40], cospi[32], temp0[55], cosBit);
temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[41], cospi[32], temp0[54], cosBit);
temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[42], cospi[32], temp0[53], cosBit);
temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[43], cospi[32], temp0[52], cosBit);
temp1[44] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[44], cospi[32], temp0[51], cosBit);
temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[45], cospi[32], temp0[50], cosBit);
temp1[46] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[46], cospi[32], temp0[49], cosBit);
temp1[47] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[47], cospi[32], temp0[48], cosBit);
temp1[48] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[48], cospi[32], temp0[47], cosBit);
temp1[49] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[49], cospi[32], temp0[46], cosBit);
temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[50], cospi[32], temp0[45], cosBit);
temp1[51] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[51], cospi[32], temp0[44], cosBit);
temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[52], cospi[32], temp0[43], cosBit);
temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[53], cospi[32], temp0[42], cosBit);
temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[54], cospi[32], temp0[41], cosBit);
temp1[55] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[55], cospi[32], temp0[40], cosBit);
temp1[56] = temp0[56];
temp1[57] = temp0[57];
temp1[58] = temp0[58];
temp1[59] = temp0[59];
temp1[60] = temp0[60];
temp1[61] = temp0[61];
temp1[62] = temp0[62];
temp1[63] = temp0[63];
// stage 3
temp0[0] = temp1[0] + temp1[15];
temp0[1] = temp1[1] + temp1[14];
temp0[2] = temp1[2] + temp1[13];
temp0[3] = temp1[3] + temp1[12];
temp0[4] = temp1[4] + temp1[11];
temp0[5] = temp1[5] + temp1[10];
temp0[6] = temp1[6] + temp1[9];
temp0[7] = temp1[7] + temp1[8];
temp0[8] = -temp1[8] + temp1[7];
temp0[9] = -temp1[9] + temp1[6];
temp0[10] = -temp1[10] + temp1[5];
temp0[11] = -temp1[11] + temp1[4];
temp0[12] = -temp1[12] + temp1[3];
temp0[13] = -temp1[13] + temp1[2];
temp0[14] = -temp1[14] + temp1[1];
temp0[15] = -temp1[15] + temp1[0];
temp0[16] = temp1[16];
temp0[17] = temp1[17];
temp0[18] = temp1[18];
temp0[19] = temp1[19];
temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[20], cospi[32], temp1[27], cosBit);
temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[21], cospi[32], temp1[26], cosBit);
temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[22], cospi[32], temp1[25], cosBit);
temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[23], cospi[32], temp1[24], cosBit);
temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[24], cospi[32], temp1[23], cosBit);
temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[25], cospi[32], temp1[22], cosBit);
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[26], cospi[32], temp1[21], cosBit);
temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[27], cospi[32], temp1[20], cosBit);
temp0[28] = temp1[28];
temp0[29] = temp1[29];
temp0[30] = temp1[30];
temp0[31] = temp1[31];
temp0[32] = temp1[32] + temp1[47];
temp0[33] = temp1[33] + temp1[46];
temp0[34] = temp1[34] + temp1[45];
temp0[35] = temp1[35] + temp1[44];
temp0[36] = temp1[36] + temp1[43];
temp0[37] = temp1[37] + temp1[42];
temp0[38] = temp1[38] + temp1[41];
temp0[39] = temp1[39] + temp1[40];
temp0[40] = -temp1[40] + temp1[39];
temp0[41] = -temp1[41] + temp1[38];
temp0[42] = -temp1[42] + temp1[37];
temp0[43] = -temp1[43] + temp1[36];
temp0[44] = -temp1[44] + temp1[35];
temp0[45] = -temp1[45] + temp1[34];
temp0[46] = -temp1[46] + temp1[33];
temp0[47] = -temp1[47] + temp1[32];
temp0[48] = -temp1[48] + temp1[63];
temp0[49] = -temp1[49] + temp1[62];
temp0[50] = -temp1[50] + temp1[61];
temp0[51] = -temp1[51] + temp1[60];
temp0[52] = -temp1[52] + temp1[59];
temp0[53] = -temp1[53] + temp1[58];
temp0[54] = -temp1[54] + temp1[57];
temp0[55] = -temp1[55] + temp1[56];
temp0[56] = temp1[56] + temp1[55];
temp0[57] = temp1[57] + temp1[54];
temp0[58] = temp1[58] + temp1[53];
temp0[59] = temp1[59] + temp1[52];
temp0[60] = temp1[60] + temp1[51];
temp0[61] = temp1[61] + temp1[50];
temp0[62] = temp1[62] + temp1[49];
temp0[63] = temp1[63] + temp1[48];
// stage 4
temp1[0] = temp0[0] + temp0[7];
temp1[1] = temp0[1] + temp0[6];
temp1[2] = temp0[2] + temp0[5];
temp1[3] = temp0[3] + temp0[4];
temp1[4] = -temp0[4] + temp0[3];
temp1[5] = -temp0[5] + temp0[2];
temp1[6] = -temp0[6] + temp0[1];
temp1[7] = -temp0[7] + temp0[0];
temp1[8] = temp0[8];
temp1[9] = temp0[9];
temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[10], cospi[32], temp0[13], cosBit);
temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[11], cospi[32], temp0[12], cosBit);
temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[12], cospi[32], temp0[11], cosBit);
temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[13], cospi[32], temp0[10], cosBit);
temp1[14] = temp0[14];
temp1[15] = temp0[15];
temp1[16] = temp0[16] + temp0[23];
temp1[17] = temp0[17] + temp0[22];
temp1[18] = temp0[18] + temp0[21];
temp1[19] = temp0[19] + temp0[20];
temp1[20] = -temp0[20] + temp0[19];
temp1[21] = -temp0[21] + temp0[18];
temp1[22] = -temp0[22] + temp0[17];
temp1[23] = -temp0[23] + temp0[16];
temp1[24] = -temp0[24] + temp0[31];
temp1[25] = -temp0[25] + temp0[30];
temp1[26] = -temp0[26] + temp0[29];
temp1[27] = -temp0[27] + temp0[28];
temp1[28] = temp0[28] + temp0[27];
temp1[29] = temp0[29] + temp0[26];
temp1[30] = temp0[30] + temp0[25];
temp1[31] = temp0[31] + temp0[24];
temp1[32] = temp0[32];
temp1[33] = temp0[33];
temp1[34] = temp0[34];
temp1[35] = temp0[35];
temp1[36] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[36], cospi[48], temp0[59], cosBit);
temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[37], cospi[48], temp0[58], cosBit);
temp1[38] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[38], cospi[48], temp0[57], cosBit);
temp1[39] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[39], cospi[48], temp0[56], cosBit);
temp1[40] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[40], -cospi[16], temp0[55], cosBit);
temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[41], -cospi[16], temp0[54], cosBit);
temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[42], -cospi[16], temp0[53], cosBit);
temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[43], -cospi[16], temp0[52], cosBit);
temp1[44] = temp0[44];
temp1[45] = temp0[45];
temp1[46] = temp0[46];
temp1[47] = temp0[47];
temp1[48] = temp0[48];
temp1[49] = temp0[49];
temp1[50] = temp0[50];
temp1[51] = temp0[51];
temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[52], -cospi[16], temp0[43], cosBit);
temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[53], -cospi[16], temp0[42], cosBit);
temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[54], -cospi[16], temp0[41], cosBit);
temp1[55] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[55], -cospi[16], temp0[40], cosBit);
temp1[56] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[56], cospi[48], temp0[39], cosBit);
temp1[57] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[57], cospi[48], temp0[38], cosBit);
temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[58], cospi[48], temp0[37], cosBit);
temp1[59] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[59], cospi[48], temp0[36], cosBit);
temp1[60] = temp0[60];
temp1[61] = temp0[61];
temp1[62] = temp0[62];
temp1[63] = temp0[63];
// stage 5
temp0[0] = temp1[0] + temp1[3];
temp0[1] = temp1[1] + temp1[2];
temp0[2] = -temp1[2] + temp1[1];
temp0[3] = -temp1[3] + temp1[0];
temp0[4] = temp1[4];
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[5], cospi[32], temp1[6], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[6], cospi[32], temp1[5], cosBit);
temp0[7] = temp1[7];
temp0[8] = temp1[8] + temp1[11];
temp0[9] = temp1[9] + temp1[10];
temp0[10] = -temp1[10] + temp1[9];
temp0[11] = -temp1[11] + temp1[8];
temp0[12] = -temp1[12] + temp1[15];
temp0[13] = -temp1[13] + temp1[14];
temp0[14] = temp1[14] + temp1[13];
temp0[15] = temp1[15] + temp1[12];
temp0[16] = temp1[16];
temp0[17] = temp1[17];
temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[18], cospi[48], temp1[29], cosBit);
temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp1[19], cospi[48], temp1[28], cosBit);
temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[20], -cospi[16], temp1[27], cosBit);
temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp1[21], -cospi[16], temp1[26], cosBit);
temp0[22] = temp1[22];
temp0[23] = temp1[23];
temp0[24] = temp1[24];
temp0[25] = temp1[25];
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[26], -cospi[16], temp1[21], cosBit);
temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[27], -cospi[16], temp1[20], cosBit);
temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[28], cospi[48], temp1[19], cosBit);
temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp1[29], cospi[48], temp1[18], cosBit);
temp0[30] = temp1[30];
temp0[31] = temp1[31];
temp0[32] = temp1[32] + temp1[39];
temp0[33] = temp1[33] + temp1[38];
temp0[34] = temp1[34] + temp1[37];
temp0[35] = temp1[35] + temp1[36];
temp0[36] = -temp1[36] + temp1[35];
temp0[37] = -temp1[37] + temp1[34];
temp0[38] = -temp1[38] + temp1[33];
temp0[39] = -temp1[39] + temp1[32];
temp0[40] = -temp1[40] + temp1[47];
temp0[41] = -temp1[41] + temp1[46];
temp0[42] = -temp1[42] + temp1[45];
temp0[43] = -temp1[43] + temp1[44];
temp0[44] = temp1[44] + temp1[43];
temp0[45] = temp1[45] + temp1[42];
temp0[46] = temp1[46] + temp1[41];
temp0[47] = temp1[47] + temp1[40];
temp0[48] = temp1[48] + temp1[55];
temp0[49] = temp1[49] + temp1[54];
temp0[50] = temp1[50] + temp1[53];
temp0[51] = temp1[51] + temp1[52];
temp0[52] = -temp1[52] + temp1[51];
temp0[53] = -temp1[53] + temp1[50];
temp0[54] = -temp1[54] + temp1[49];
temp0[55] = -temp1[55] + temp1[48];
temp0[56] = -temp1[56] + temp1[63];
temp0[57] = -temp1[57] + temp1[62];
temp0[58] = -temp1[58] + temp1[61];
temp0[59] = -temp1[59] + temp1[60];
temp0[60] = temp1[60] + temp1[59];
temp0[61] = temp1[61] + temp1[58];
temp0[62] = temp1[62] + temp1[57];
temp0[63] = temp1[63] + temp1[56];
// stage 6
temp1[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[0], cospi[32], temp0[1], cosBit);
temp1[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[1], cospi[32], temp0[0], cosBit);
temp1[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[2], cospi[16], temp0[3], cosBit);
temp1[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[3], -cospi[16], temp0[2], cosBit);
temp1[4] = temp0[4] + temp0[5];
temp1[5] = -temp0[5] + temp0[4];
temp1[6] = -temp0[6] + temp0[7];
temp1[7] = temp0[7] + temp0[6];
temp1[8] = temp0[8];
temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[16], temp0[9], cospi[48], temp0[14], cosBit);
temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[48], temp0[10], -cospi[16], temp0[13], cosBit);
temp1[11] = temp0[11];
temp1[12] = temp0[12];
temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp0[13], -cospi[16], temp0[10], cosBit);
temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[16], temp0[14], cospi[48], temp0[9], cosBit);
temp1[15] = temp0[15];
temp1[16] = temp0[16] + temp0[19];
temp1[17] = temp0[17] + temp0[18];
temp1[18] = -temp0[18] + temp0[17];
temp1[19] = -temp0[19] + temp0[16];
temp1[20] = -temp0[20] + temp0[23];
temp1[21] = -temp0[21] + temp0[22];
temp1[22] = temp0[22] + temp0[21];
temp1[23] = temp0[23] + temp0[20];
temp1[24] = temp0[24] + temp0[27];
temp1[25] = temp0[25] + temp0[26];
temp1[26] = -temp0[26] + temp0[25];
temp1[27] = -temp0[27] + temp0[24];
temp1[28] = -temp0[28] + temp0[31];
temp1[29] = -temp0[29] + temp0[30];
temp1[30] = temp0[30] + temp0[29];
temp1[31] = temp0[31] + temp0[28];
temp1[32] = temp0[32];
temp1[33] = temp0[33];
temp1[34] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp0[34], cospi[56], temp0[61], cosBit);
temp1[35] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp0[35], cospi[56], temp0[60], cosBit);
temp1[36] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp0[36], -cospi[8], temp0[59], cosBit);
temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp0[37], -cospi[8], temp0[58], cosBit);
temp1[38] = temp0[38];
temp1[39] = temp0[39];
temp1[40] = temp0[40];
temp1[41] = temp0[41];
temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp0[42], cospi[24], temp0[53], cosBit);
temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp0[43], cospi[24], temp0[52], cosBit);
temp1[44] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp0[44], -cospi[40], temp0[51], cosBit);
temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp0[45], -cospi[40], temp0[50], cosBit);
temp1[46] = temp0[46];
temp1[47] = temp0[47];
temp1[48] = temp0[48];
temp1[49] = temp0[49];
temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[50], -cospi[40], temp0[45], cosBit);
temp1[51] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[51], -cospi[40], temp0[44], cosBit);
temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp0[52], cospi[24], temp0[43], cosBit);
temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp0[53], cospi[24], temp0[42], cosBit);
temp1[54] = temp0[54];
temp1[55] = temp0[55];
temp1[56] = temp0[56];
temp1[57] = temp0[57];
temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[58], -cospi[8], temp0[37], cosBit);
temp1[59] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[59], -cospi[8], temp0[36], cosBit);
temp1[60] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp0[60], cospi[56], temp0[35], cosBit);
temp1[61] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp0[61], cospi[56], temp0[34], cosBit);
temp1[62] = temp0[62];
temp1[63] = temp0[63];
// stage 7
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[4], cospi[8], temp1[7], cosBit);
temp0[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[5], cospi[40], temp1[6], cosBit);
temp0[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[6], -cospi[40], temp1[5], cosBit);
temp0[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[7], -cospi[8], temp1[4], cosBit);
temp0[8] = temp1[8] + temp1[9];
temp0[9] = -temp1[9] + temp1[8];
temp0[10] = -temp1[10] + temp1[11];
temp0[11] = temp1[11] + temp1[10];
temp0[12] = temp1[12] + temp1[13];
temp0[13] = -temp1[13] + temp1[12];
temp0[14] = -temp1[14] + temp1[15];
temp0[15] = temp1[15] + temp1[14];
temp0[16] = temp1[16];
temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[8], temp1[17], cospi[56], temp1[30], cosBit);
temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[56], temp1[18], -cospi[8], temp1[29], cosBit);
temp0[19] = temp1[19];
temp0[20] = temp1[20];
temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[40], temp1[21], cospi[24], temp1[26], cosBit);
temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[24], temp1[22], -cospi[40], temp1[25], cosBit);
temp0[23] = temp1[23];
temp0[24] = temp1[24];
temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp1[25], -cospi[40], temp1[22], cosBit);
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[40], temp1[26], cospi[24], temp1[21], cosBit);
temp0[27] = temp1[27];
temp0[28] = temp1[28];
temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp1[29], -cospi[8], temp1[18], cosBit);
temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[8], temp1[30], cospi[56], temp1[17], cosBit);
temp0[31] = temp1[31];
temp0[32] = temp1[32] + temp1[35];
temp0[33] = temp1[33] + temp1[34];
temp0[34] = -temp1[34] + temp1[33];
temp0[35] = -temp1[35] + temp1[32];
temp0[36] = -temp1[36] + temp1[39];
temp0[37] = -temp1[37] + temp1[38];
temp0[38] = temp1[38] + temp1[37];
temp0[39] = temp1[39] + temp1[36];
temp0[40] = temp1[40] + temp1[43];
temp0[41] = temp1[41] + temp1[42];
temp0[42] = -temp1[42] + temp1[41];
temp0[43] = -temp1[43] + temp1[40];
temp0[44] = -temp1[44] + temp1[47];
temp0[45] = -temp1[45] + temp1[46];
temp0[46] = temp1[46] + temp1[45];
temp0[47] = temp1[47] + temp1[44];
temp0[48] = temp1[48] + temp1[51];
temp0[49] = temp1[49] + temp1[50];
temp0[50] = -temp1[50] + temp1[49];
temp0[51] = -temp1[51] + temp1[48];
temp0[52] = -temp1[52] + temp1[55];
temp0[53] = -temp1[53] + temp1[54];
temp0[54] = temp1[54] + temp1[53];
temp0[55] = temp1[55] + temp1[52];
temp0[56] = temp1[56] + temp1[59];
temp0[57] = temp1[57] + temp1[58];
temp0[58] = -temp1[58] + temp1[57];
temp0[59] = -temp1[59] + temp1[56];
temp0[60] = -temp1[60] + temp1[63];
temp0[61] = -temp1[61] + temp1[62];
temp0[62] = temp1[62] + temp1[61];
temp0[63] = temp1[63] + temp1[60];
// stage 8
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = temp0[2];
temp1[3] = temp0[3];
temp1[4] = temp0[4];
temp1[5] = temp0[5];
temp1[6] = temp0[6];
temp1[7] = temp0[7];
temp1[8] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[8], cospi[4], temp0[15], cosBit);
temp1[9] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[9], cospi[36], temp0[14], cosBit);
temp1[10] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[10], cospi[20], temp0[13], cosBit);
temp1[11] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[11], cospi[52], temp0[12], cosBit);
temp1[12] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[12], -cospi[52], temp0[11], cosBit);
temp1[13] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[13], -cospi[20], temp0[10], cosBit);
temp1[14] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[14], -cospi[36], temp0[9], cosBit);
temp1[15] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[15], -cospi[4], temp0[8], cosBit);
temp1[16] = temp0[16] + temp0[17];
temp1[17] = -temp0[17] + temp0[16];
temp1[18] = -temp0[18] + temp0[19];
temp1[19] = temp0[19] + temp0[18];
temp1[20] = temp0[20] + temp0[21];
temp1[21] = -temp0[21] + temp0[20];
temp1[22] = -temp0[22] + temp0[23];
temp1[23] = temp0[23] + temp0[22];
temp1[24] = temp0[24] + temp0[25];
temp1[25] = -temp0[25] + temp0[24];
temp1[26] = -temp0[26] + temp0[27];
temp1[27] = temp0[27] + temp0[26];
temp1[28] = temp0[28] + temp0[29];
temp1[29] = -temp0[29] + temp0[28];
temp1[30] = -temp0[30] + temp0[31];
temp1[31] = temp0[31] + temp0[30];
temp1[32] = temp0[32];
temp1[33] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[4], temp0[33], cospi[60], temp0[62], cosBit);
temp1[34] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[60], temp0[34], -cospi[4], temp0[61], cosBit);
temp1[35] = temp0[35];
temp1[36] = temp0[36];
temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[36], temp0[37], cospi[28], temp0[58], cosBit);
temp1[38] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[28], temp0[38], -cospi[36], temp0[57], cosBit);
temp1[39] = temp0[39];
temp1[40] = temp0[40];
temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[20], temp0[41], cospi[44], temp0[54], cosBit);
temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[44], temp0[42], -cospi[20], temp0[53], cosBit);
temp1[43] = temp0[43];
temp1[44] = temp0[44];
temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[52], temp0[45], cospi[12], temp0[50], cosBit);
temp1[46] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[12], temp0[46], -cospi[52], temp0[49], cosBit);
temp1[47] = temp0[47];
temp1[48] = temp0[48];
temp1[49] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[12], temp0[49], -cospi[52], temp0[46], cosBit);
temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[52], temp0[50], cospi[12], temp0[45], cosBit);
temp1[51] = temp0[51];
temp1[52] = temp0[52];
temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[44], temp0[53], -cospi[20], temp0[42], cosBit);
temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[20], temp0[54], cospi[44], temp0[41], cosBit);
temp1[55] = temp0[55];
temp1[56] = temp0[56];
temp1[57] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[28], temp0[57], -cospi[36], temp0[38], cosBit);
temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[36], temp0[58], cospi[28], temp0[37], cosBit);
temp1[59] = temp0[59];
temp1[60] = temp0[60];
temp1[61] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[60], temp0[61], -cospi[4], temp0[34], cosBit);
temp1[62] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[4], temp0[62], cospi[60], temp0[33], cosBit);
temp1[63] = temp0[63];
// stage 9
temp0[0] = temp1[0];
temp0[1] = temp1[1];
temp0[2] = temp1[2];
temp0[3] = temp1[3];
temp0[4] = temp1[4];
temp0[5] = temp1[5];
temp0[6] = temp1[6];
temp0[7] = temp1[7];
temp0[8] = temp1[8];
temp0[9] = temp1[9];
temp0[10] = temp1[10];
temp0[11] = temp1[11];
temp0[12] = temp1[12];
temp0[13] = temp1[13];
temp0[14] = temp1[14];
temp0[15] = temp1[15];
temp0[16] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp1[16], cospi[2], temp1[31], cosBit);
temp0[17] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp1[17], cospi[34], temp1[30], cosBit);
temp0[18] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp1[18], cospi[18], temp1[29], cosBit);
temp0[19] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp1[19], cospi[50], temp1[28], cosBit);
temp0[20] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp1[20], cospi[10], temp1[27], cosBit);
temp0[21] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp1[21], cospi[42], temp1[26], cosBit);
temp0[22] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp1[22], cospi[26], temp1[25], cosBit);
temp0[23] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp1[23], cospi[58], temp1[24], cosBit);
temp0[24] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[6], temp1[24], -cospi[58], temp1[23], cosBit);
temp0[25] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[38], temp1[25], -cospi[26], temp1[22], cosBit);
temp0[26] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[22], temp1[26], -cospi[42], temp1[21], cosBit);
temp0[27] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[54], temp1[27], -cospi[10], temp1[20], cosBit);
temp0[28] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[14], temp1[28], -cospi[50], temp1[19], cosBit);
temp0[29] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[46], temp1[29], -cospi[18], temp1[18], cosBit);
temp0[30] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[30], temp1[30], -cospi[34], temp1[17], cosBit);
temp0[31] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[62], temp1[31], -cospi[2], temp1[16], cosBit);
temp0[32] = temp1[32] + temp1[33];
temp0[33] = -temp1[33] + temp1[32];
temp0[34] = -temp1[34] + temp1[35];
temp0[35] = temp1[35] + temp1[34];
temp0[36] = temp1[36] + temp1[37];
temp0[37] = -temp1[37] + temp1[36];
temp0[38] = -temp1[38] + temp1[39];
temp0[39] = temp1[39] + temp1[38];
temp0[40] = temp1[40] + temp1[41];
temp0[41] = -temp1[41] + temp1[40];
temp0[42] = -temp1[42] + temp1[43];
temp0[43] = temp1[43] + temp1[42];
temp0[44] = temp1[44] + temp1[45];
temp0[45] = -temp1[45] + temp1[44];
temp0[46] = -temp1[46] + temp1[47];
temp0[47] = temp1[47] + temp1[46];
temp0[48] = temp1[48] + temp1[49];
temp0[49] = -temp1[49] + temp1[48];
temp0[50] = -temp1[50] + temp1[51];
temp0[51] = temp1[51] + temp1[50];
temp0[52] = temp1[52] + temp1[53];
temp0[53] = -temp1[53] + temp1[52];
temp0[54] = -temp1[54] + temp1[55];
temp0[55] = temp1[55] + temp1[54];
temp0[56] = temp1[56] + temp1[57];
temp0[57] = -temp1[57] + temp1[56];
temp0[58] = -temp1[58] + temp1[59];
temp0[59] = temp1[59] + temp1[58];
temp0[60] = temp1[60] + temp1[61];
temp0[61] = -temp1[61] + temp1[60];
temp0[62] = -temp1[62] + temp1[63];
temp0[63] = temp1[63] + temp1[62];
// stage 10
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = temp0[2];
temp1[3] = temp0[3];
temp1[4] = temp0[4];
temp1[5] = temp0[5];
temp1[6] = temp0[6];
temp1[7] = temp0[7];
temp1[8] = temp0[8];
temp1[9] = temp0[9];
temp1[10] = temp0[10];
temp1[11] = temp0[11];
temp1[12] = temp0[12];
temp1[13] = temp0[13];
temp1[14] = temp0[14];
temp1[15] = temp0[15];
temp1[16] = temp0[16];
temp1[17] = temp0[17];
temp1[18] = temp0[18];
temp1[19] = temp0[19];
temp1[20] = temp0[20];
temp1[21] = temp0[21];
temp1[22] = temp0[22];
temp1[23] = temp0[23];
temp1[24] = temp0[24];
temp1[25] = temp0[25];
temp1[26] = temp0[26];
temp1[27] = temp0[27];
temp1[28] = temp0[28];
temp1[29] = temp0[29];
temp1[30] = temp0[30];
temp1[31] = temp0[31];
temp1[32] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[63], temp0[32], cospi[1], temp0[63], cosBit);
temp1[33] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[31], temp0[33], cospi[33], temp0[62], cosBit);
temp1[34] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[47], temp0[34], cospi[17], temp0[61], cosBit);
temp1[35] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[15], temp0[35], cospi[49], temp0[60], cosBit);
temp1[36] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[55], temp0[36], cospi[9], temp0[59], cosBit);
temp1[37] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[23], temp0[37], cospi[41], temp0[58], cosBit);
temp1[38] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[39], temp0[38], cospi[25], temp0[57], cosBit);
temp1[39] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[7], temp0[39], cospi[57], temp0[56], cosBit);
temp1[40] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[59], temp0[40], cospi[5], temp0[55], cosBit);
temp1[41] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[27], temp0[41], cospi[37], temp0[54], cosBit);
temp1[42] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[43], temp0[42], cospi[21], temp0[53], cosBit);
temp1[43] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[11], temp0[43], cospi[53], temp0[52], cosBit);
temp1[44] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[51], temp0[44], cospi[13], temp0[51], cosBit);
temp1[45] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[19], temp0[45], cospi[45], temp0[50], cosBit);
temp1[46] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[35], temp0[46], cospi[29], temp0[49], cosBit);
temp1[47] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[3], temp0[47], cospi[61], temp0[48], cosBit);
temp1[48] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[3], temp0[48], -cospi[61], temp0[47], cosBit);
temp1[49] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[35], temp0[49], -cospi[29], temp0[46], cosBit);
temp1[50] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[19], temp0[50], -cospi[45], temp0[45], cosBit);
temp1[51] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[51], temp0[51], -cospi[13], temp0[44], cosBit);
temp1[52] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[11], temp0[52], -cospi[53], temp0[43], cosBit);
temp1[53] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[43], temp0[53], -cospi[21], temp0[42], cosBit);
temp1[54] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[27], temp0[54], -cospi[37], temp0[41], cosBit);
temp1[55] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[59], temp0[55], -cospi[5], temp0[40], cosBit);
temp1[56] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[7], temp0[56], -cospi[57], temp0[39], cosBit);
temp1[57] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[39], temp0[57], -cospi[25], temp0[38], cosBit);
temp1[58] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[23], temp0[58], -cospi[41], temp0[37], cosBit);
temp1[59] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[55], temp0[59], -cospi[9], temp0[36], cosBit);
temp1[60] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[15], temp0[60], -cospi[49], temp0[35], cosBit);
temp1[61] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[47], temp0[61], -cospi[17], temp0[34], cosBit);
temp1[62] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[31], temp0[62], -cospi[33], temp0[33], cosBit);
temp1[63] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[63], temp0[63], -cospi[1], temp0[32], cosBit);
// stage 11
Unsafe.Add(ref output, 0) = temp1[0];
Unsafe.Add(ref output, 1) = temp1[32];
Unsafe.Add(ref output, 2) = temp1[16];
Unsafe.Add(ref output, 3) = temp1[48];
Unsafe.Add(ref output, 4) = temp1[8];
Unsafe.Add(ref output, 5) = temp1[40];
Unsafe.Add(ref output, 6) = temp1[24];
Unsafe.Add(ref output, 7) = temp1[56];
Unsafe.Add(ref output, 8) = temp1[4];
Unsafe.Add(ref output, 9) = temp1[36];
Unsafe.Add(ref output, 10) = temp1[20];
Unsafe.Add(ref output, 11) = temp1[52];
Unsafe.Add(ref output, 12) = temp1[12];
Unsafe.Add(ref output, 13) = temp1[44];
Unsafe.Add(ref output, 14) = temp1[28];
Unsafe.Add(ref output, 15) = temp1[60];
Unsafe.Add(ref output, 16) = temp1[2];
Unsafe.Add(ref output, 17) = temp1[34];
Unsafe.Add(ref output, 18) = temp1[18];
Unsafe.Add(ref output, 19) = temp1[50];
Unsafe.Add(ref output, 20) = temp1[10];
Unsafe.Add(ref output, 21) = temp1[42];
Unsafe.Add(ref output, 22) = temp1[26];
Unsafe.Add(ref output, 23) = temp1[58];
Unsafe.Add(ref output, 24) = temp1[6];
Unsafe.Add(ref output, 25) = temp1[38];
Unsafe.Add(ref output, 26) = temp1[22];
Unsafe.Add(ref output, 27) = temp1[54];
Unsafe.Add(ref output, 28) = temp1[14];
Unsafe.Add(ref output, 29) = temp1[46];
Unsafe.Add(ref output, 30) = temp1[30];
Unsafe.Add(ref output, 31) = temp1[62];
Unsafe.Add(ref output, 32) = temp1[1];
Unsafe.Add(ref output, 33) = temp1[33];
Unsafe.Add(ref output, 34) = temp1[17];
Unsafe.Add(ref output, 35) = temp1[49];
Unsafe.Add(ref output, 36) = temp1[9];
Unsafe.Add(ref output, 37) = temp1[41];
Unsafe.Add(ref output, 38) = temp1[25];
Unsafe.Add(ref output, 39) = temp1[57];
Unsafe.Add(ref output, 40) = temp1[5];
Unsafe.Add(ref output, 41) = temp1[37];
Unsafe.Add(ref output, 42) = temp1[21];
Unsafe.Add(ref output, 43) = temp1[53];
Unsafe.Add(ref output, 44) = temp1[13];
Unsafe.Add(ref output, 45) = temp1[45];
Unsafe.Add(ref output, 46) = temp1[29];
Unsafe.Add(ref output, 47) = temp1[61];
Unsafe.Add(ref output, 48) = temp1[3];
Unsafe.Add(ref output, 49) = temp1[35];
Unsafe.Add(ref output, 50) = temp1[19];
Unsafe.Add(ref output, 51) = temp1[51];
Unsafe.Add(ref output, 52) = temp1[11];
Unsafe.Add(ref output, 53) = temp1[43];
Unsafe.Add(ref output, 54) = temp1[27];
Unsafe.Add(ref output, 55) = temp1[59];
Unsafe.Add(ref output, 56) = temp1[7];
Unsafe.Add(ref output, 57) = temp1[39];
Unsafe.Add(ref output, 58) = temp1[23];
Unsafe.Add(ref output, 59) = temp1[55];
Unsafe.Add(ref output, 60) = temp1[15];
Unsafe.Add(ref output, 61) = temp1[47];
Unsafe.Add(ref output, 62) = temp1[31];
Unsafe.Add(ref output, 63) = temp1[63];
}
}

63
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs

@ -1,10 +1,71 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Dct8Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output, cosBit);
private static void TransformScalar(ref int input, ref int output, int cosBit)
{
Span<int> temp0 = stackalloc int[8];
Span<int> temp1 = stackalloc int[8];
// stage 0;
// stage 1;
temp0[0] = Unsafe.Add(ref input, 0) + Unsafe.Add(ref input, 7);
temp0[1] = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 6);
temp0[2] = Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 5);
temp0[3] = Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 4);
temp0[4] = -Unsafe.Add(ref input, 4) + Unsafe.Add(ref input, 3);
temp0[5] = -Unsafe.Add(ref input, 5) + Unsafe.Add(ref input, 2);
temp0[6] = -Unsafe.Add(ref input, 6) + Unsafe.Add(ref input, 1);
temp0[7] = -Unsafe.Add(ref input, 7) + Unsafe.Add(ref input, 0);
// stage 2
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit);
temp1[0] = temp0[0] + temp0[3];
temp1[1] = temp0[1] + temp0[2];
temp1[2] = -temp0[2] + temp0[1];
temp1[3] = -temp0[3] + temp0[0];
temp1[4] = temp0[4];
temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp0[5], cospi[32], temp0[6], cosBit);
temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp0[6], cospi[32], temp0[5], cosBit);
temp1[7] = temp0[7];
// stage 3
temp0[0] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[32], temp1[0], cospi[32], temp1[1], cosBit);
temp0[1] = Av1Dct4Forward1dTransformer.HalfButterfly(-cospi[32], temp1[1], cospi[32], temp1[0], cosBit);
temp0[2] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[2], cospi[16], temp1[3], cosBit);
temp0[3] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[48], temp1[3], -cospi[16], temp1[2], cosBit);
temp0[4] = temp1[4] + temp1[5];
temp0[5] = -temp1[5] + temp1[4];
temp0[6] = -temp1[6] + temp1[7];
temp0[7] = temp1[7] + temp1[6];
// stage 4
temp1[0] = temp0[0];
temp1[1] = temp0[1];
temp1[2] = temp0[2];
temp1[3] = temp0[3];
temp1[4] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[4], cospi[8], temp0[7], cosBit);
temp1[5] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[5], cospi[40], temp0[6], cosBit);
temp1[6] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[24], temp0[6], -cospi[40], temp0[5], cosBit);
temp1[7] = Av1Dct4Forward1dTransformer.HalfButterfly(cospi[56], temp0[7], -cospi[8], temp0[4], cosBit);
// stage 5
Unsafe.Add(ref output, 0) = temp1[0];
Unsafe.Add(ref output, 1) = temp1[4];
Unsafe.Add(ref output, 2) = temp1[2];
Unsafe.Add(ref output, 3) = temp1[6];
Unsafe.Add(ref output, 4) = temp1[1];
Unsafe.Add(ref output, 5) = temp1[5];
Unsafe.Add(ref output, 6) = temp1[3];
Unsafe.Add(ref output, 7) = temp1[7];
}
}

6
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs

@ -7,8 +7,8 @@ namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal abstract class Av1Forward2dTransformerBase
{
private const int NewSqrt = 5793;
private const int NewSqrtBitCount = 12;
internal const int NewSqrt2 = 5793;
internal const int NewSqrt2BitCount = 12;
/// <summary>
/// SVT: av1_tranform_two_d_core_c
@ -108,7 +108,7 @@ internal abstract class Av1Forward2dTransformerBase
for (c = 0; c < transformColumnCount; ++c)
{
ref int current = ref Unsafe.Add(ref output, (r * transformColumnCount) + c);
current = Av1Math.RoundShift((long)current * NewSqrt, NewSqrtBitCount);
current = Av1Math.RoundShift((long)current * NewSqrt2, NewSqrt2BitCount);
}
}
}

26
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs

@ -1,10 +1,34 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Identity16Forward1dTransformer : IAv1Forward1dTransformer
{
private const int TwiceNewSqrt2 = 2 * 5793;
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output);
private static void TransformScalar(ref int input, ref int output)
{
output = Av1Math.RoundShift((long)input * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 1) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 1) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 2) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 2) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 3) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 3) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 4) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 4) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 5) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 5) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 6) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 6) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 7) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 7) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 8) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 8) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 9) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 9) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 10) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 10) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 11) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 11) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 12) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 12) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 13) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 13) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 14) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 14) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 15) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 15) * TwiceNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
}
}

21
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs

@ -1,10 +1,29 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Identity32Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
{
TransformScalar(ref input, ref output);
TransformScalar(ref Unsafe.Add(ref input, 8), ref Unsafe.Add(ref output, 8));
TransformScalar(ref Unsafe.Add(ref input, 16), ref Unsafe.Add(ref output, 16));
TransformScalar(ref Unsafe.Add(ref input, 24), ref Unsafe.Add(ref output, 24));
}
private static void TransformScalar(ref int input, ref int output)
{
output = input << 2;
Unsafe.Add(ref output, 1) = Unsafe.Add(ref input, 1) << 2;
Unsafe.Add(ref output, 2) = Unsafe.Add(ref input, 2) << 2;
Unsafe.Add(ref output, 3) = Unsafe.Add(ref input, 3) << 2;
Unsafe.Add(ref output, 4) = Unsafe.Add(ref input, 4) << 2;
Unsafe.Add(ref output, 5) = Unsafe.Add(ref input, 5) << 2;
Unsafe.Add(ref output, 6) = Unsafe.Add(ref input, 6) << 2;
Unsafe.Add(ref output, 7) = Unsafe.Add(ref input, 7) << 2;
}
}

12
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs

@ -1,10 +1,20 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Identity4Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output);
private static void TransformScalar(ref int input, ref int output)
{
output = Av1Math.RoundShift((long)input * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 1) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 1) * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 2) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 2) * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 3) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 3) * Av1Forward2dTransformerBase.NewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
}
}

31
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs

@ -1,10 +1,39 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Identity64Forward1dTransformer : IAv1Forward1dTransformer
{
private const int QuadNewSqrt2 = 4 * 5793;
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
{
TransformScalar(ref input, ref output);
TransformScalar(ref Unsafe.Add(ref input, 16), ref Unsafe.Add(ref output, 16));
TransformScalar(ref Unsafe.Add(ref input, 32), ref Unsafe.Add(ref output, 32));
TransformScalar(ref Unsafe.Add(ref input, 48), ref Unsafe.Add(ref output, 48));
}
private static void TransformScalar(ref int input, ref int output)
{
output = Av1Math.RoundShift((long)input * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 1) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 1) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 2) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 2) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 3) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 3) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 4) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 4) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 5) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 5) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 6) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 6) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 7) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 7) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 8) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 8) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 9) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 9) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 10) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 10) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 11) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 11) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 12) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 12) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 13) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 13) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 14) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 14) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
Unsafe.Add(ref output, 15) = Av1Math.RoundShift((long)Unsafe.Add(ref input, 15) * QuadNewSqrt2, Av1Forward2dTransformerBase.NewSqrt2BitCount);
}
}

16
src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs

@ -1,10 +1,24 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward;
internal class Av1Identity8Forward1dTransformer : IAv1Forward1dTransformer
{
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange)
=> throw new NotImplementedException();
=> TransformScalar(ref input, ref output);
private static void TransformScalar(ref int input, ref int output)
{
output = input << 1;
Unsafe.Add(ref output, 1) = Unsafe.Add(ref input, 1) << 1;
Unsafe.Add(ref output, 2) = Unsafe.Add(ref input, 2) << 1;
Unsafe.Add(ref output, 3) = Unsafe.Add(ref input, 3) << 1;
Unsafe.Add(ref output, 4) = Unsafe.Add(ref input, 4) << 1;
Unsafe.Add(ref output, 5) = Unsafe.Add(ref input, 5) << 1;
Unsafe.Add(ref output, 6) = Unsafe.Add(ref input, 6) << 1;
Unsafe.Add(ref output, 7) = Unsafe.Add(ref input, 7) << 1;
}
}

148
tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs

@ -13,15 +13,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Heif.Av1;
[Trait("Format", "Avif")]
public class Av1ForwardTransformTests
{
private static readonly double[] MaximumAllowedError =
private static readonly int[] MaximumAllowedError =
[
3, // 4x4 transform
5, // 8x8 transform
11, // 16x16 transform
70, // 32x32 transform
64, // 64x64 transform
3.9, // 4x8 transform
4.3, // 8x4 transform
4, // 4x8 transform
5, // 8x4 transform
12, // 8x16 transform
12, // 16x8 transform
32, // 16x32 transform
@ -36,45 +36,61 @@ public class Av1ForwardTransformTests
36, // 64x16 transform
];
[Theory]
[MemberData(nameof(GetSizes))]
public void AccuracyDct1dTest(int txSize)
{
Random rnd = new(0);
const int testBlockCount = 1; // Originally set to: 1000
Av1TransformSize transformSize = (Av1TransformSize)txSize;
Av1Transform2dFlipConfiguration config = new(Av1TransformType.DctDct, transformSize);
int width = config.TransformSize.GetWidth();
[Fact]
public void AccuracyOfDct1dTransformSize4Test()
=> AssertAccuracy1d(Av1TransformSize.Size4x4, Av1TransformType.DctDct, new Av1Dct4Forward1dTransformer());
int[] inputOfTest = new int[width];
double[] inputReference = new double[width];
int[] outputOfTest = new int[width];
double[] outputReference = new double[width];
for (int ti = 0; ti < testBlockCount; ++ti)
{
// prepare random test data
for (int ni = 0; ni < width; ++ni)
{
inputOfTest[ni] = (short)rnd.Next((1 << 10) - 1);
inputReference[ni] = inputOfTest[ni];
outputReference[ni] = 0;
outputOfTest[ni] = 255;
}
[Fact]
public void AccuracyOfDct1dTransformSize8Test()
=> AssertAccuracy1d(Av1TransformSize.Size8x8, Av1TransformType.DctDct, new Av1Dct8Forward1dTransformer(), 2);
// calculate in forward transform functions
new Av1Dct4Forward1dTransformer().Transform(
ref inputOfTest[0],
ref outputOfTest[0],
config.CosBitColumn,
config.StageRangeColumn);
[Fact]
public void AccuracyOfDct1dTransformSize16Test()
=> AssertAccuracy1d(Av1TransformSize.Size16x16, Av1TransformType.DctDct, new Av1Dct16Forward1dTransformer(), 3);
// calculate in reference forward transform functions
Av1ReferenceTransform.ReferenceDct1d(inputReference, outputReference, width);
[Fact]
public void AccuracyOfDct1dTransformSize32Test()
=> AssertAccuracy1d(Av1TransformSize.Size32x32, Av1TransformType.DctDct, new Av1Dct32Forward1dTransformer(), 4);
// Assert
Assert.True(CompareWithError(outputReference, outputOfTest, 1));
}
}
[Fact]
public void AccuracyOfDct1dTransformSize64Test()
=> AssertAccuracy1d(Av1TransformSize.Size64x64, Av1TransformType.DctDct, new Av1Dct64Forward1dTransformer(), 5);
[Fact]
public void AccuracyOfAdst1dTransformSize4Test()
=> AssertAccuracy1d(Av1TransformSize.Size4x4, Av1TransformType.AdstAdst, new Av1Adst4Forward1dTransformer());
[Fact]
public void AccuracyOfAdst1dTransformSize8Test()
=> AssertAccuracy1d(Av1TransformSize.Size8x8, Av1TransformType.AdstAdst, new Av1Adst8Forward1dTransformer(), 2);
[Fact]
public void AccuracyOfAdst1dTransformSize16Test()
=> AssertAccuracy1d(Av1TransformSize.Size16x16, Av1TransformType.AdstAdst, new Av1Adst16Forward1dTransformer(), 3);
[Fact]
public void AccuracyOfAdst1dTransformSize32Test()
=> AssertAccuracy1d(Av1TransformSize.Size32x32, Av1TransformType.AdstAdst, new Av1Adst32Forward1dTransformer(), 4);
[Fact]
public void AccuracyOfIdentity1dTransformSize4Test()
=> AssertAccuracy1d(Av1TransformSize.Size4x4, Av1TransformType.Identity, new Av1Identity4Forward1dTransformer());
[Fact]
public void AccuracyOfIdentity1dTransformSize8Test()
=> AssertAccuracy1d(Av1TransformSize.Size8x8, Av1TransformType.Identity, new Av1Identity8Forward1dTransformer());
[Fact]
public void AccuracyOfIdentity1dTransformSize16Test()
=> AssertAccuracy1d(Av1TransformSize.Size16x16, Av1TransformType.Identity, new Av1Identity16Forward1dTransformer());
[Fact]
public void AccuracyOfIdentity1dTransformSize32Test()
=> AssertAccuracy1d(Av1TransformSize.Size32x32, Av1TransformType.Identity, new Av1Identity32Forward1dTransformer());
[Fact]
public void AccuracyOfIdentity1dTransformSize64Test()
=> AssertAccuracy1d(Av1TransformSize.Size64x64, Av1TransformType.Identity, new Av1Identity64Forward1dTransformer());
[Theory]
[MemberData(nameof(GetCombinations))]
@ -212,10 +228,51 @@ public class Av1ForwardTransformTests
}
}
private static void AssertAccuracy1d(
Av1TransformSize transformSize,
Av1TransformType transformType,
IAv1Forward1dTransformer transformerUnderTest,
int allowedError = 1)
{
Random rnd = new(0);
const int testBlockCount = 1; // Originally set to: 1000
Av1Transform2dFlipConfiguration config = new(transformType, transformSize);
int width = config.TransformSize.GetWidth();
int[] inputOfTest = new int[width];
double[] inputReference = new double[width];
int[] outputOfTest = new int[width];
double[] outputReference = new double[width];
for (int ti = 0; ti < testBlockCount; ++ti)
{
// prepare random test data
for (int ni = 0; ni < width; ++ni)
{
inputOfTest[ni] = (short)rnd.Next((1 << 10) - 1);
inputReference[ni] = inputOfTest[ni];
outputReference[ni] = 0;
outputOfTest[ni] = 255;
}
// calculate in forward transform functions
transformerUnderTest.Transform(
ref inputOfTest[0],
ref outputOfTest[0],
config.CosBitColumn,
config.StageRangeColumn);
// calculate in reference forward transform functions
Av1ReferenceTransform.ReferenceTransform1d(config.TransformTypeColumn, inputReference, outputReference, width);
// Assert
Assert.True(CompareWithError(outputReference, outputOfTest, allowedError));
}
}
private static bool CompareWithError(Span<double> expected, Span<int> actual, double allowedError)
{
// compare for the result is witghin accuracy
double maximumErrorInTest = 0;
double maximumErrorInTest = 0d;
for (int ni = 0; ni < expected.Length; ++ni)
{
maximumErrorInTest = Math.Max(maximumErrorInTest, Math.Abs(actual[ni] - Math.Round(expected[ni])));
@ -224,21 +281,12 @@ public class Av1ForwardTransformTests
return maximumErrorInTest <= allowedError;
}
public static TheoryData<int> GetSizes()
{
TheoryData<int> sizes = [];
// For now test only 4x4.
sizes.Add(0);
return sizes;
}
public static TheoryData<int, int, int> GetCombinations()
{
TheoryData<int, int, int> combinations = [];
for (int s = 0; s < (int)Av1TransformSize.AllSizes; s++)
{
double maxError = MaximumAllowedError[s];
int maxError = MaximumAllowedError[s];
for (int t = 0; t < (int)Av1TransformType.AllTransformTypes; t++)
{
Av1TransformType transformType = (Av1TransformType)t;
@ -246,7 +294,7 @@ public class Av1ForwardTransformTests
Av1Transform2dFlipConfiguration config = new(transformType, transformSize);
if (config.IsAllowed())
{
combinations.Add(s, t, (int)maxError);
combinations.Add(s, t, maxError);
}
// For now only DCT.

4
tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ReferenceTransform.cs

@ -174,7 +174,7 @@ internal class Av1ReferenceTransform
}
}
internal static void ReferenceDct1d(Span<double> input, Span<double> output, int size)
private static void ReferenceDct1d(Span<double> input, Span<double> output, int size)
{
const double kInvSqrt2 = 0.707106781186547524400844362104f;
for (int k = 0; k < size; ++k)
@ -223,7 +223,7 @@ internal class Av1ReferenceTransform
}
}
private static void ReferenceTransform1d(Av1TransformType1d type, Span<double> input, Span<double> output, int size)
internal static void ReferenceTransform1d(Av1TransformType1d type, Span<double> input, Span<double> output, int size)
{
switch (type)
{

Loading…
Cancel
Save