diff --git a/src/ImageSharp/Formats/Heif/Av1/Av1Math.cs b/src/ImageSharp/Formats/Heif/Av1/Av1Math.cs index d5187475d8..ea93204d48 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Av1Math.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Av1Math.cs @@ -1,10 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System; -using System.Runtime.InteropServices; -using System.Security.Cryptography; - namespace SixLabors.ImageSharp.Formats.Heif.Av1; internal static class Av1Math @@ -167,10 +163,10 @@ internal static class Av1Math internal static int RoundPowerOf2(int value, int n) => (value + ((1 << n) >> 1)) >> n; internal static int Clamp(int value, int low, int high) - => value < low ? low : (value > high ? high : value); + => Math.Max(low, Math.Min(high, value)); internal static long Clamp(long value, long low, long high) - => value < low ? low : (value > high ? high : value); + => Math.Max(low, Math.Min(high, value)); internal static int DivideLog2Floor(int value, int n) => value >> n; @@ -178,6 +174,9 @@ internal static class Av1Math internal static int DivideLog2Ceiling(int value, int n) => (value + (1 << n) - 1) >> n; + internal static int DivideRound(int value, int bitCount) + => (value + (1 << (bitCount - 1))) >> bitCount; + // Last 3 bits are the value of mod 8. internal static int Modulus8(int value) => value & 0x07; @@ -204,6 +203,4 @@ internal static class Av1Math => endOfBlockExtra |= 1 << n; internal static int AbsoluteDifference(int a, int b) => (a > b) ? a - b : b - a; - - internal static int DivideRound(int value, int bitCount) => (value + (1 << (bitCount - 1))) >> bitCount; } diff --git a/src/ImageSharp/Formats/Heif/Av1/Entropy/Av1NzMap.cs b/src/ImageSharp/Formats/Heif/Av1/Entropy/Av1NzMap.cs index d6c951376d..9c15625921 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Entropy/Av1NzMap.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Entropy/Av1NzMap.cs @@ -372,7 +372,7 @@ internal static class Av1NzMap public static int GetNzMapContextFromStats(int stats, Point position, Av1TransformSize transformSize, Av1TransformClass transformClass) { // tx_class == 0(TX_CLASS_2D) - if (position.Y == 0 && ((int)transformClass | position.X) == 0) + if (transformClass == 0 && (position.X == 0) && (position.Y == 0)) { return 0; } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs index 0940425871..6494027cfc 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1ForwardTransformer.cs @@ -12,7 +12,7 @@ internal class Av1ForwardTransformer private const int NewSqrt = 5793; private const int NewSqrtBitCount = 12; - private static readonly IAv1Forward1dTransformer?[] Transformers = + private static readonly IAv1Transformer1d?[] Transformers = [ new Av1Dct4Forward1dTransformer(), new Av1Dct8Forward1dTransformer(), @@ -36,14 +36,14 @@ internal class Av1ForwardTransformer internal static void Transform2d(Span input, Span coefficients, uint stride, Av1TransformType transformType, Av1TransformSize transformSize, int bitDepth) { Av1Transform2dFlipConfiguration config = new(transformType, transformSize); - IAv1Forward1dTransformer? columnTransformer = GetTransformer(config.TransformFunctionTypeColumn); - IAv1Forward1dTransformer? rowTransformer = GetTransformer(config.TransformFunctionTypeRow); + IAv1Transformer1d? columnTransformer = GetTransformer(config.TransformFunctionTypeColumn); + IAv1Transformer1d? rowTransformer = GetTransformer(config.TransformFunctionTypeRow); Transform2d(columnTransformer, rowTransformer, input, coefficients, stride, config, bitDepth); } internal static void Transform2d(TColumn? transformFunctionColumn, TRow? transformFunctionRow, Span input, Span coefficients, uint stride, Av1Transform2dFlipConfiguration config, int bitDepth) - where TColumn : IAv1Forward1dTransformer - where TRow : IAv1Forward1dTransformer + where TColumn : IAv1Transformer1d + where TRow : IAv1Transformer1d { if (transformFunctionColumn != null && transformFunctionRow != null) { @@ -55,15 +55,15 @@ internal class Av1ForwardTransformer } } - private static IAv1Forward1dTransformer? GetTransformer(Av1TransformFunctionType transformerType) + private static IAv1Transformer1d? GetTransformer(Av1TransformFunctionType transformerType) => Transformers[(int)transformerType]; /// /// SVT: av1_tranform_two_d_core_c /// private static void Transform2dCore(TColumn transformFunctionColumn, TRow transformFunctionRow, Span input, uint inputStride, Span output, Av1Transform2dFlipConfiguration config, Span buf, int bitDepth) - where TColumn : IAv1Forward1dTransformer - where TRow : IAv1Forward1dTransformer + where TColumn : IAv1Transformer1d + where TRow : IAv1Transformer1d { int c, r; diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Inverse2dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Inverse2dTransformer.cs index eda267bb97..db0a42610c 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Inverse2dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Inverse2dTransformer.cs @@ -38,8 +38,8 @@ internal class Av1Inverse2dTransformer int cosBitColumn = config.CosBitColumn; int cosBitRow = config.CosBitRow; - IAv1Forward1dTransformer? functionColumn = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeColumn); - IAv1Forward1dTransformer? functionRow = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeRow); + IAv1Transformer1d? functionColumn = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeColumn); + IAv1Transformer1d? functionRow = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeRow); Guard.NotNull(functionColumn); Guard.NotNull(functionRow); @@ -168,8 +168,8 @@ internal class Av1Inverse2dTransformer int cosBitColumn = config.CosBitColumn; int cosBitRow = config.CosBitRow; - IAv1Forward1dTransformer? functionColumn = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeColumn); - IAv1Forward1dTransformer? functionRow = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeRow); + IAv1Transformer1d? functionColumn = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeColumn); + IAv1Transformer1d? functionRow = Av1InverseTransformerFactory.GetTransformer(config.TransformFunctionTypeRow); Guard.NotNull(functionColumn); Guard.NotNull(functionRow); diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1InverseTransformerFactory.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1InverseTransformerFactory.cs index 526ff5250e..83c08625f4 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1InverseTransformerFactory.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1InverseTransformerFactory.cs @@ -31,11 +31,22 @@ internal static class Av1InverseTransformerFactory Av1Inverse2dTransformer.Transform2dAdd(coefficients, readBuffer, readStride, writeBuffer, writeStride, config, buffer, transformFunctionParameters.BitDepth); } - internal static IAv1Forward1dTransformer? GetTransformer(Av1TransformFunctionType type) => type switch + internal static IAv1Transformer1d? GetTransformer(Av1TransformFunctionType type) => type switch { Av1TransformFunctionType.Dct4 => new Av1Dct4Inverse1dTransformer(), + Av1TransformFunctionType.Dct8 => new Av1Dct8Inverse1dTransformer(), + Av1TransformFunctionType.Dct16 => new Av1Dct16Inverse1dTransformer(), + Av1TransformFunctionType.Dct32 => new Av1Dct32Inverse1dTransformer(), + Av1TransformFunctionType.Dct64 => new Av1Dct64Inverse1dTransformer(), Av1TransformFunctionType.Adst4 => new Av1Adst4Inverse1dTransformer(), + Av1TransformFunctionType.Adst8 => new Av1Adst8Inverse1dTransformer(), + Av1TransformFunctionType.Adst16 => new Av1Adst16Inverse1dTransformer(), + Av1TransformFunctionType.Adst32 => new Av1Adst32Inverse1dTransformer(), Av1TransformFunctionType.Identity4 => new Av1Identity4Inverse1dTransformer(), + Av1TransformFunctionType.Identity8 => new Av1Identity8Inverse1dTransformer(), + Av1TransformFunctionType.Identity16 => new Av1Identity16Inverse1dTransformer(), + Av1TransformFunctionType.Identity32 => new Av1Identity32Inverse1dTransformer(), + Av1TransformFunctionType.Identity64 => new Av1Identity64Inverse1dTransformer(), _ => null }; } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1SinusConstants.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1SinusConstants.cs index 241730c6b2..994a7637cb 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1SinusConstants.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1SinusConstants.cs @@ -67,7 +67,47 @@ internal static class Av1SinusConstants [0, 21133, 39716, 53510, 60849] ]; + private static readonly int[] Cosinus128Lookup = [ + 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, + 4017, 3996, 3973, 3948, 3920, 3889, 3857, 3822, + 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461, + 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, + 2896, 2824, 2751, 2675, 2598, 2520, 2440, 2359, + 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660, + 1567, 1474, 1380, 1285, 1189, 1092, 995, 897, + 799, 700, 601, 501, 401, 301, 201, 101, 0 + ]; + public static Span CosinusPi(int n) => CosinusPiArray[n - MinimumCosinusBit]; public static Span SinusPi(int n) => SinusPiArray[n - MinimumCosinusBit]; + + /// + /// Spec: 7.13.2.1 Butterfly functions + /// + public static int Sinus128(int angle) => Cosinus128(angle - 64); + + /// + /// Spec: 7.13.2.1 Butterfly functions + /// + public static int Cosinus128(int angle) + { + int angle2 = angle & 255; + if (angle2 is >= 0 and <= 64) + { + return Cosinus128Lookup[angle2]; + } + + if (angle2 <= 128) + { + return -Cosinus128Lookup[128 - angle2]; + } + + if (angle2 <= 192) + { + return -Cosinus128Lookup[angle2 - 128]; + } + + return Cosinus128Lookup[256 - angle2]; + } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs index b1abf23245..ccbc283c2f 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1Transform2dFlipConfiguration.cs @@ -140,8 +140,8 @@ internal class Av1Transform2dFlipConfiguration this.CosBitRow = CosBitRowMap[txw_idx][txh_idx]; this.TransformFunctionTypeColumn = TransformFunctionTypeMap[txh_idx][(int)this.TransformTypeColumn]; this.TransformFunctionTypeRow = TransformFunctionTypeMap[txw_idx][(int)this.TransformTypeRow]; - this.StageNumberColumn = StageNumberList[(int)this.TransformFunctionTypeColumn]; - this.StageNumberRow = StageNumberList[(int)this.TransformFunctionTypeRow]; + this.StageNumberColumn = this.TransformFunctionTypeColumn != Av1TransformFunctionType.Invalid ? StageNumberList[(int)this.TransformFunctionTypeColumn] : -1; + this.StageNumberRow = this.TransformFunctionTypeRow != Av1TransformFunctionType.Invalid ? StageNumberList[(int)this.TransformFunctionTypeRow] : -1; this.StageRangeColumn = new byte[12]; this.StageRangeRow = new byte[12]; this.NonScaleRange(); @@ -303,23 +303,23 @@ internal class Av1Transform2dFlipConfiguration /// private void NonScaleRange() { - Span range_mult2_col = RangeMulti2List[(int)this.TransformFunctionTypeColumn]; if (this.TransformFunctionTypeColumn != Av1TransformFunctionType.Invalid) { + Span range_mult2_col = RangeMulti2List[(int)this.TransformFunctionTypeColumn]; int stage_num_col = this.StageNumberColumn; for (int i = 0; i < stage_num_col; ++i) { this.StageRangeColumn[i] = (byte)((range_mult2_col[i] + 1) >> 1); } - } - if (this.TransformFunctionTypeRow != Av1TransformFunctionType.Invalid) - { - int stage_num_row = this.StageNumberRow; - Span range_mult2_row = RangeMulti2List[(int)this.TransformFunctionTypeRow]; - for (int i = 0; i < stage_num_row; ++i) + if (this.TransformFunctionTypeRow != Av1TransformFunctionType.Invalid) { - this.StageRangeRow[i] = (byte)((range_mult2_col[this.StageNumberColumn - 1] + range_mult2_row[i] + 1) >> 1); + int stage_num_row = this.StageNumberRow; + Span range_mult2_row = RangeMulti2List[(int)this.TransformFunctionTypeRow]; + for (int i = 0; i < stage_num_row; ++i) + { + this.StageRangeRow[i] = (byte)((range_mult2_col[this.StageNumberColumn - 1] + range_mult2_row[i] + 1) >> 1); + } } } } diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1TransformSizeExtensions.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1TransformSizeExtensions.cs index ab4fc7f113..06562e12dd 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Av1TransformSizeExtensions.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Av1TransformSizeExtensions.cs @@ -171,9 +171,9 @@ internal static class Av1TransformSizeExtensions _ => size }; - public static int GetBlockWidthLog2(this Av1TransformSize size) => BlockWidthLog2[(int)GetAdjusted(size)]; + public static int GetBlockWidthLog2(this Av1TransformSize size) => BlockWidthLog2[(int)size]; - public static int GetBlockHeightLog2(this Av1TransformSize size) => BlockHeightLog2[(int)GetAdjusted(size)]; + public static int GetBlockHeightLog2(this Av1TransformSize size) => BlockHeightLog2[(int)size]; public static int GetRectangleLogRatio(this Av1TransformSize size) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs index 927e333e83..fbd8bb36ad 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst16Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Adst16Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Adst16Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs index ba907e3a04..80a06f6b26 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst32Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Adst32Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Adst32Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs index 38b11dfbfb..f3ab6926be 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst4Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Adst4Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Adst4Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs index 701973fc4c..b0aac3656c 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Adst8Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Adst8Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Adst8Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs index ea515aad5d..5b1eb0602d 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct16Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Dct16Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Dct16Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs index 2b49035f29..544b5a2f03 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct32Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Dct32Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Dct32Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs index d43e2535c0..a7ed5f0a53 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct4Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Dct4Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Dct4Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs index 57b59cc488..84c1690560 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct64Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Dct64Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Dct64Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs index 923227e57f..8f5fc8926f 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Dct8Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Dct8Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Dct8Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs index dbe93cf8e3..1da6135fef 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Forward2dTransformerBase.cs @@ -14,8 +14,8 @@ internal abstract class Av1Forward2dTransformerBase /// SVT: av1_tranform_two_d_core_c /// protected static void Transform2dCore(TColumn transformFunctionColumn, TRow transformFunctionRow, Span input, uint inputStride, Span output, Av1Transform2dFlipConfiguration config, Span buf, int bitDepth) - where TColumn : IAv1Forward1dTransformer - where TRow : IAv1Forward1dTransformer + where TColumn : IAv1Transformer1d + where TRow : IAv1Transformer1d { int c, r; diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs index 78ab05d150..7b822c76e6 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity16Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Identity16Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity16Forward1dTransformer : IAv1Transformer1d { private const int TwiceNewSqrt2 = 2 * 5793; diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs index 13ee029464..dac3ba0035 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity32Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Identity32Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity32Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs index 45a8a78fb1..d1721af140 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity4Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Identity4Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity4Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs index 15a9ae658f..de62fd3d96 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity64Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Identity64Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity64Forward1dTransformer : IAv1Transformer1d { private const int QuadNewSqrt2 = 4 * 5793; diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs index 822e7b4ae6..20df9d9061 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Forward/Av1Identity8Forward1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; -internal class Av1Identity8Forward1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity8Forward1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/IAv1Forward1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/IAv1Transformer1d.cs similarity index 93% rename from src/ImageSharp/Formats/Heif/Av1/Transform/IAv1Forward1dTransformer.cs rename to src/ImageSharp/Formats/Heif/Av1/Transform/IAv1Transformer1d.cs index c77e27acee..c1357734ec 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/IAv1Forward1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/IAv1Transformer1d.cs @@ -6,7 +6,7 @@ namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform; /// /// Implementation of a specific forward 1-dimensional transform function. /// -internal interface IAv1Forward1dTransformer +internal interface IAv1Transformer1d { /// /// Execute the 1 dimensional transformation. diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst16Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst16Inverse1dTransformer.cs new file mode 100644 index 0000000000..28a4e88ccb --- /dev/null +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst16Inverse1dTransformer.cs @@ -0,0 +1,219 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; + +internal class Av1Adst16Inverse1dTransformer : IAv1Transformer1d +{ + public void Transform(Span input, Span output, int cosBit, Span stageRange) + { + Guard.MustBeSizedAtLeast(input, 16, nameof(input)); + Guard.MustBeSizedAtLeast(output, 16, nameof(output)); + TransformScalar(ref input[0], ref output[0], cosBit, stageRange); + } + + /// + /// SVT: svt_av1_iadst16_new + /// + private static void TransformScalar(ref int input, ref int output, int cosBit, Span stageRange) + { + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + + int stage = 0; + Span stepSpan = stackalloc int[16]; + ref int step = ref stepSpan[0]; + Span bufferSpan = stackalloc int[16]; + ref int buffer = ref bufferSpan[0]; + + // stage 0; + + // stage 1; + stage++; + buffer = Unsafe.Add(ref input, 15); + Unsafe.Add(ref buffer, 1) = input; + Unsafe.Add(ref buffer, 2) = Unsafe.Add(ref input, 13); + Unsafe.Add(ref buffer, 3) = Unsafe.Add(ref input, 2); + Unsafe.Add(ref buffer, 4) = Unsafe.Add(ref input, 11); + Unsafe.Add(ref buffer, 5) = Unsafe.Add(ref input, 4); + Unsafe.Add(ref buffer, 6) = Unsafe.Add(ref input, 9); + Unsafe.Add(ref buffer, 7) = Unsafe.Add(ref input, 6); + Unsafe.Add(ref buffer, 8) = Unsafe.Add(ref input, 7); + Unsafe.Add(ref buffer, 9) = Unsafe.Add(ref input, 8); + Unsafe.Add(ref buffer, 10) = Unsafe.Add(ref input, 5); + Unsafe.Add(ref buffer, 11) = Unsafe.Add(ref input, 10); + Unsafe.Add(ref buffer, 12) = Unsafe.Add(ref input, 3); + Unsafe.Add(ref buffer, 13) = Unsafe.Add(ref input, 12); + Unsafe.Add(ref buffer, 14) = Unsafe.Add(ref input, 1); + Unsafe.Add(ref buffer, 15) = Unsafe.Add(ref input, 14); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + step = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[2], buffer, cospi[62], Unsafe.Add(ref buffer, 1), cosBit); + Unsafe.Add(ref step, 1) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[62], buffer, -cospi[2], Unsafe.Add(ref buffer, 1), cosBit); + Unsafe.Add(ref step, 2) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[10], Unsafe.Add(ref buffer, 2), cospi[54], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 3) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[54], Unsafe.Add(ref buffer, 2), -cospi[10], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 4) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[18], Unsafe.Add(ref buffer, 4), cospi[46], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 5) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[46], Unsafe.Add(ref buffer, 4), -cospi[18], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 6) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[26], Unsafe.Add(ref buffer, 6), cospi[38], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 7) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[38], Unsafe.Add(ref buffer, 6), -cospi[26], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 8) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[34], Unsafe.Add(ref buffer, 8), cospi[30], Unsafe.Add(ref buffer, 9), cosBit); + Unsafe.Add(ref step, 9) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[30], Unsafe.Add(ref buffer, 8), -cospi[34], Unsafe.Add(ref buffer, 9), cosBit); + Unsafe.Add(ref step, 10) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[42], Unsafe.Add(ref buffer, 10), cospi[22], Unsafe.Add(ref buffer, 1), cosBit); + Unsafe.Add(ref step, 11) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[22], Unsafe.Add(ref buffer, 10), -cospi[42], Unsafe.Add(ref buffer, 11), cosBit); + Unsafe.Add(ref step, 12) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[50], Unsafe.Add(ref buffer, 12), cospi[14], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 13) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[14], Unsafe.Add(ref buffer, 12), -cospi[50], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 14) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[58], Unsafe.Add(ref buffer, 14), cospi[6], Unsafe.Add(ref buffer, 15), cosBit); + Unsafe.Add(ref step, 15) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[14], Unsafe.Add(ref buffer, 14), -cospi[58], Unsafe.Add(ref buffer, 15), cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + byte range = stageRange[stage]; + buffer = Av1Dct4Inverse1dTransformer.ClampValue(step + Unsafe.Add(ref step, 8), range); + Unsafe.Add(ref buffer, 1) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) + Unsafe.Add(ref step, 9), range); + Unsafe.Add(ref buffer, 2) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 2) + Unsafe.Add(ref step, 10), range); + Unsafe.Add(ref buffer, 3) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 3) + Unsafe.Add(ref step, 11), range); + Unsafe.Add(ref buffer, 4) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 4) + Unsafe.Add(ref step, 12), range); + Unsafe.Add(ref buffer, 5) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 5) + Unsafe.Add(ref step, 13), range); + Unsafe.Add(ref buffer, 6) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 6) + Unsafe.Add(ref step, 14), range); + Unsafe.Add(ref buffer, 7) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 7) + Unsafe.Add(ref step, 15), range); + Unsafe.Add(ref buffer, 8) = Av1Dct4Inverse1dTransformer.ClampValue(step - Unsafe.Add(ref step, 8), range); + Unsafe.Add(ref buffer, 9) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) - Unsafe.Add(ref step, 9), range); + Unsafe.Add(ref buffer, 10) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 2) - Unsafe.Add(ref step, 10), range); + Unsafe.Add(ref buffer, 11) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 3) - Unsafe.Add(ref step, 11), range); + Unsafe.Add(ref buffer, 12) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 4) - Unsafe.Add(ref step, 12), range); + Unsafe.Add(ref buffer, 13) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 5) - Unsafe.Add(ref step, 13), range); + Unsafe.Add(ref buffer, 14) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 6) - Unsafe.Add(ref step, 14), range); + Unsafe.Add(ref buffer, 15) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 7) - Unsafe.Add(ref step, 15), range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + step = buffer; + Unsafe.Add(ref step, 1) = Unsafe.Add(ref buffer, 1); + Unsafe.Add(ref step, 2) = Unsafe.Add(ref buffer, 2); + Unsafe.Add(ref step, 3) = Unsafe.Add(ref buffer, 3); + Unsafe.Add(ref step, 4) = Unsafe.Add(ref buffer, 4); + Unsafe.Add(ref step, 5) = Unsafe.Add(ref buffer, 5); + Unsafe.Add(ref step, 6) = Unsafe.Add(ref buffer, 6); + Unsafe.Add(ref step, 7) = Unsafe.Add(ref buffer, 7); + Unsafe.Add(ref step, 8) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[8], Unsafe.Add(ref buffer, 8), cospi[56], Unsafe.Add(ref buffer, 9), cosBit); + Unsafe.Add(ref step, 9) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[56], Unsafe.Add(ref buffer, 8), -cospi[8], Unsafe.Add(ref buffer, 9), cosBit); + Unsafe.Add(ref step, 10) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[40], Unsafe.Add(ref buffer, 10), cospi[24], Unsafe.Add(ref buffer, 11), cosBit); + Unsafe.Add(ref step, 11) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[24], Unsafe.Add(ref buffer, 10), -cospi[40], Unsafe.Add(ref buffer, 11), cosBit); + Unsafe.Add(ref step, 12) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[56], Unsafe.Add(ref buffer, 12), cospi[8], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 13) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[8], Unsafe.Add(ref buffer, 12), cospi[56], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 14) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[24], Unsafe.Add(ref buffer, 14), cospi[40], Unsafe.Add(ref buffer, 15), cosBit); + Unsafe.Add(ref step, 15) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[40], Unsafe.Add(ref buffer, 14), cospi[24], Unsafe.Add(ref buffer, 15), cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + range = stageRange[stage]; + buffer = Av1Dct4Inverse1dTransformer.ClampValue(step + Unsafe.Add(ref step, 4), range); + Unsafe.Add(ref buffer, 1) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) + Unsafe.Add(ref step, 5), range); + Unsafe.Add(ref buffer, 2) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 2) + Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 3) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 3) + Unsafe.Add(ref step, 7), range); + Unsafe.Add(ref buffer, 4) = Av1Dct4Inverse1dTransformer.ClampValue(step - Unsafe.Add(ref step, 4), range); + Unsafe.Add(ref buffer, 5) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) - Unsafe.Add(ref step, 5), range); + Unsafe.Add(ref buffer, 6) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 2) - Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 7) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 3) - Unsafe.Add(ref step, 7), range); + Unsafe.Add(ref buffer, 8) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 8) + Unsafe.Add(ref step, 12), range); + Unsafe.Add(ref buffer, 9) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 9) + Unsafe.Add(ref step, 13), range); + Unsafe.Add(ref buffer, 10) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 10) + Unsafe.Add(ref step, 14), range); + Unsafe.Add(ref buffer, 11) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 11) + Unsafe.Add(ref step, 15), range); + Unsafe.Add(ref buffer, 12) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 8) - Unsafe.Add(ref step, 12), range); + Unsafe.Add(ref buffer, 13) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 9) - Unsafe.Add(ref step, 13), range); + Unsafe.Add(ref buffer, 14) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 10) - Unsafe.Add(ref step, 14), range); + Unsafe.Add(ref buffer, 15) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 11) - Unsafe.Add(ref step, 15), range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + step = buffer; + Unsafe.Add(ref step, 1) = Unsafe.Add(ref buffer, 1); + Unsafe.Add(ref step, 2) = Unsafe.Add(ref buffer, 2); + Unsafe.Add(ref step, 3) = Unsafe.Add(ref buffer, 3); + Unsafe.Add(ref step, 4) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], Unsafe.Add(ref buffer, 4), cospi[48], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 5) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[48], Unsafe.Add(ref buffer, 4), -cospi[16], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 6) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[48], Unsafe.Add(ref buffer, 6), cospi[16], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 7) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], Unsafe.Add(ref buffer, 6), cospi[48], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 8) = Unsafe.Add(ref buffer, 8); + Unsafe.Add(ref step, 9) = Unsafe.Add(ref buffer, 9); + Unsafe.Add(ref step, 10) = Unsafe.Add(ref buffer, 10); + Unsafe.Add(ref step, 11) = Unsafe.Add(ref buffer, 11); + Unsafe.Add(ref step, 12) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], Unsafe.Add(ref buffer, 12), cospi[48], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 13) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[48], Unsafe.Add(ref buffer, 12), -cospi[16], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 14) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[48], Unsafe.Add(ref buffer, 14), cospi[16], Unsafe.Add(ref buffer, 15), cosBit); + Unsafe.Add(ref step, 15) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], Unsafe.Add(ref buffer, 14), cospi[48], Unsafe.Add(ref buffer, 15), cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + range = stageRange[stage]; + buffer = Av1Dct4Inverse1dTransformer.ClampValue(step + Unsafe.Add(ref step, 2), range); + Unsafe.Add(ref buffer, 1) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) + Unsafe.Add(ref step, 3), range); + Unsafe.Add(ref buffer, 2) = Av1Dct4Inverse1dTransformer.ClampValue(step - Unsafe.Add(ref step, 2), range); + Unsafe.Add(ref buffer, 3) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) - Unsafe.Add(ref step, 3), range); + Unsafe.Add(ref buffer, 4) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 4) + Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 5) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 5) + Unsafe.Add(ref step, 7), range); + Unsafe.Add(ref buffer, 6) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 4) - Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 7) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 5) - Unsafe.Add(ref step, 7), range); + Unsafe.Add(ref buffer, 8) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 8) + Unsafe.Add(ref step, 10), range); + Unsafe.Add(ref buffer, 9) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 9) + Unsafe.Add(ref step, 11), range); + Unsafe.Add(ref buffer, 10) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 8) - Unsafe.Add(ref step, 10), range); + Unsafe.Add(ref buffer, 11) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 9) - Unsafe.Add(ref step, 11), range); + Unsafe.Add(ref buffer, 12) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 12) + Unsafe.Add(ref step, 14), range); + Unsafe.Add(ref buffer, 13) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 13) + Unsafe.Add(ref step, 15), range); + Unsafe.Add(ref buffer, 14) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 12) - Unsafe.Add(ref step, 14), range); + Unsafe.Add(ref buffer, 15) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 13) - Unsafe.Add(ref step, 15), range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 8 + step = buffer; + Unsafe.Add(ref step, 1) = Unsafe.Add(ref buffer, 1); + Unsafe.Add(ref step, 2) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 2), cospi[32], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 3) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 2), -cospi[32], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 4) = Unsafe.Add(ref buffer, 4); + Unsafe.Add(ref step, 5) = Unsafe.Add(ref buffer, 5); + Unsafe.Add(ref step, 6) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 6), cospi[32], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 7) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 6), -cospi[32], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 8) = Unsafe.Add(ref buffer, 8); + Unsafe.Add(ref step, 9) = Unsafe.Add(ref buffer, 9); + Unsafe.Add(ref step, 10) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 10), cospi[32], Unsafe.Add(ref buffer, 11), cosBit); + Unsafe.Add(ref step, 11) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 10), -cospi[32], Unsafe.Add(ref buffer, 11), cosBit); + Unsafe.Add(ref step, 12) = Unsafe.Add(ref buffer, 12); + Unsafe.Add(ref step, 13) = Unsafe.Add(ref buffer, 13); + Unsafe.Add(ref step, 14) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 14), cospi[32], Unsafe.Add(ref buffer, 15), cosBit); + Unsafe.Add(ref step, 15) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 14), -cospi[32], Unsafe.Add(ref buffer, 15), cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 9 + output = step; + Unsafe.Add(ref output, 1) = -Unsafe.Add(ref step, 8); + Unsafe.Add(ref output, 2) = Unsafe.Add(ref step, 12); + Unsafe.Add(ref output, 3) = -Unsafe.Add(ref step, 4); + Unsafe.Add(ref output, 4) = Unsafe.Add(ref step, 6); + Unsafe.Add(ref output, 5) = -Unsafe.Add(ref step, 14); + Unsafe.Add(ref output, 6) = Unsafe.Add(ref step, 10); + Unsafe.Add(ref output, 7) = -Unsafe.Add(ref step, 2); + Unsafe.Add(ref output, 8) = Unsafe.Add(ref step, 3); + Unsafe.Add(ref output, 9) = -Unsafe.Add(ref step, 11); + Unsafe.Add(ref output, 10) = Unsafe.Add(ref step, 15); + Unsafe.Add(ref output, 11) = -Unsafe.Add(ref step, 7); + Unsafe.Add(ref output, 12) = Unsafe.Add(ref step, 5); + Unsafe.Add(ref output, 13) = -Unsafe.Add(ref step, 13); + Unsafe.Add(ref output, 14) = Unsafe.Add(ref step, 9); + Unsafe.Add(ref output, 15) = -Unsafe.Add(ref step, 1); + } +} diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst32Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst32Inverse1dTransformer.cs new file mode 100644 index 0000000000..1faca4b7a5 --- /dev/null +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst32Inverse1dTransformer.cs @@ -0,0 +1,431 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Drawing; +using System; +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; + +internal class Av1Adst32Inverse1dTransformer : IAv1Transformer1d +{ + public void Transform(Span input, Span output, int cosBit, Span stageRange) + { + Guard.MustBeSizedAtLeast(input, 32, nameof(input)); + Guard.MustBeSizedAtLeast(output, 32, nameof(output)); + TransformScalar(ref input[0], ref output[0], cosBit, stageRange); + } + + /// + /// SVT: svt_av1_iadst32_new + /// + private static void TransformScalar(ref int input, ref int output, int cosBit, Span stageRange) + { + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + + int stage = 0; + Span bf0 = stackalloc int[32]; + ref int step = ref bf0[0]; + Span bf1 = stackalloc int[32]; + ref int buffer = ref bf1[0]; + + // stage 0; + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref input, stageRange[stage]); + + // stage 1; + stage++; + bf1[0] = Unsafe.Add(ref input, 0); + bf1[1] = -Unsafe.Add(ref input, 31); + bf1[2] = -Unsafe.Add(ref input, 15); + bf1[3] = Unsafe.Add(ref input, 16); + bf1[4] = -Unsafe.Add(ref input, 7); + bf1[5] = Unsafe.Add(ref input, 24); + bf1[6] = Unsafe.Add(ref input, 8); + bf1[7] = -Unsafe.Add(ref input, 23); + bf1[8] = -Unsafe.Add(ref input, 3); + bf1[9] = Unsafe.Add(ref input, 28); + bf1[10] = Unsafe.Add(ref input, 12); + bf1[11] = -Unsafe.Add(ref input, 19); + bf1[12] = Unsafe.Add(ref input, 4); + bf1[13] = -Unsafe.Add(ref input, 27); + bf1[14] = -Unsafe.Add(ref input, 11); + bf1[15] = Unsafe.Add(ref input, 20); + bf1[16] = -Unsafe.Add(ref input, 1); + bf1[17] = Unsafe.Add(ref input, 30); + bf1[18] = Unsafe.Add(ref input, 14); + bf1[19] = -Unsafe.Add(ref input, 17); + bf1[20] = Unsafe.Add(ref input, 6); + bf1[21] = -Unsafe.Add(ref input, 25); + bf1[22] = -Unsafe.Add(ref input, 9); + bf1[23] = Unsafe.Add(ref input, 22); + bf1[24] = Unsafe.Add(ref input, 2); + bf1[25] = -Unsafe.Add(ref input, 29); + bf1[26] = -Unsafe.Add(ref input, 13); + bf1[27] = Unsafe.Add(ref input, 18); + bf1[28] = -Unsafe.Add(ref input, 5); + bf1[29] = Unsafe.Add(ref input, 26); + bf1[30] = Unsafe.Add(ref input, 10); + bf1[31] = -Unsafe.Add(ref input, 21); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref buffer, stageRange[stage]); + + // stage 2 + stage++; + bf0[0] = bf1[0]; + bf0[1] = bf1[1]; + bf0[2] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[2], cospi[32], bf1[3], cosBit); + bf0[3] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[2], -cospi[32], bf1[3], cosBit); + bf0[4] = bf1[4]; + bf0[5] = bf1[5]; + bf0[6] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[6], cospi[32], bf1[7], cosBit); + bf0[7] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[6], -cospi[32], bf1[7], cosBit); + bf0[8] = bf1[8]; + bf0[9] = bf1[9]; + bf0[10] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[10], cospi[32], bf1[11], cosBit); + bf0[11] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[10], -cospi[32], bf1[11], cosBit); + bf0[12] = bf1[12]; + bf0[13] = bf1[13]; + bf0[14] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[14], cospi[32], bf1[15], cosBit); + bf0[15] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[14], -cospi[32], bf1[15], cosBit); + bf0[16] = bf1[16]; + bf0[17] = bf1[17]; + bf0[18] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[18], cospi[32], bf1[19], cosBit); + bf0[19] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[18], -cospi[32], bf1[19], cosBit); + bf0[20] = bf1[20]; + bf0[21] = bf1[21]; + bf0[22] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[22], cospi[32], bf1[23], cosBit); + bf0[23] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[22], -cospi[32], bf1[23], cosBit); + bf0[24] = bf1[24]; + bf0[25] = bf1[25]; + bf0[26] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[26], cospi[32], bf1[27], cosBit); + bf0[27] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[26], -cospi[32], bf1[27], cosBit); + bf0[28] = bf1[28]; + bf0[29] = bf1[29]; + bf0[30] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[30], cospi[32], bf1[31], cosBit); + bf0[31] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], bf1[30], -cospi[32], bf1[31], cosBit); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref step, stageRange[stage]); + + // stage 3 + stage++; + bf1[0] = bf0[0] + bf0[2]; + bf1[1] = bf0[1] + bf0[3]; + bf1[2] = bf0[0] - bf0[2]; + bf1[3] = bf0[1] - bf0[3]; + bf1[4] = bf0[4] + bf0[6]; + bf1[5] = bf0[5] + bf0[7]; + bf1[6] = bf0[4] - bf0[6]; + bf1[7] = bf0[5] - bf0[7]; + bf1[8] = bf0[8] + bf0[10]; + bf1[9] = bf0[9] + bf0[11]; + bf1[10] = bf0[8] - bf0[10]; + bf1[11] = bf0[9] - bf0[11]; + bf1[12] = bf0[12] + bf0[14]; + bf1[13] = bf0[13] + bf0[15]; + bf1[14] = bf0[12] - bf0[14]; + bf1[15] = bf0[13] - bf0[15]; + bf1[16] = bf0[16] + bf0[18]; + bf1[17] = bf0[17] + bf0[19]; + bf1[18] = bf0[16] - bf0[18]; + bf1[19] = bf0[17] - bf0[19]; + bf1[20] = bf0[20] + bf0[22]; + bf1[21] = bf0[21] + bf0[23]; + bf1[22] = bf0[20] - bf0[22]; + bf1[23] = bf0[21] - bf0[23]; + bf1[24] = bf0[24] + bf0[26]; + bf1[25] = bf0[25] + bf0[27]; + bf1[26] = bf0[24] - bf0[26]; + bf1[27] = bf0[25] - bf0[27]; + bf1[28] = bf0[28] + bf0[30]; + bf1[29] = bf0[29] + bf0[31]; + bf1[30] = bf0[28] - bf0[30]; + bf1[31] = bf0[29] - bf0[31]; + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref buffer, stageRange[stage]); + + // stage 4 + stage++; + bf0[0] = bf1[0]; + bf0[1] = bf1[1]; + bf0[2] = bf1[2]; + bf0[3] = bf1[3]; + bf0[4] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[4], cospi[48], bf1[5], cosBit); + bf0[5] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[48], bf1[4], -cospi[16], bf1[5], cosBit); + bf0[6] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[48], bf1[6], cospi[16], bf1[7], cosBit); + bf0[7] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[6], cospi[48], bf1[7], cosBit); + bf0[8] = bf1[8]; + bf0[9] = bf1[9]; + bf0[10] = bf1[10]; + bf0[11] = bf1[11]; + bf0[12] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[12], cospi[48], bf1[13], cosBit); + bf0[13] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[48], bf1[12], -cospi[16], bf1[13], cosBit); + bf0[14] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[48], bf1[14], cospi[16], bf1[15], cosBit); + bf0[15] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[14], cospi[48], bf1[15], cosBit); + bf0[16] = bf1[16]; + bf0[17] = bf1[17]; + bf0[18] = bf1[18]; + bf0[19] = bf1[19]; + bf0[20] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[20], cospi[48], bf1[21], cosBit); + bf0[21] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[48], bf1[20], -cospi[16], bf1[21], cosBit); + bf0[22] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[48], bf1[22], cospi[16], bf1[23], cosBit); + bf0[23] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[22], cospi[48], bf1[23], cosBit); + bf0[24] = bf1[24]; + bf0[25] = bf1[25]; + bf0[26] = bf1[26]; + bf0[27] = bf1[27]; + bf0[28] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[28], cospi[48], bf1[29], cosBit); + bf0[29] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[48], bf1[28], -cospi[16], bf1[29], cosBit); + bf0[30] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[48], bf1[30], cospi[16], bf1[31], cosBit); + bf0[31] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], bf1[30], cospi[48], bf1[31], cosBit); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref step, stageRange[stage]); + + // stage 5 + stage++; + bf1[0] = bf0[0] + bf0[4]; + bf1[1] = bf0[1] + bf0[5]; + bf1[2] = bf0[2] + bf0[6]; + bf1[3] = bf0[3] + bf0[7]; + bf1[4] = bf0[0] - bf0[4]; + bf1[5] = bf0[1] - bf0[5]; + bf1[6] = bf0[2] - bf0[6]; + bf1[7] = bf0[3] - bf0[7]; + bf1[8] = bf0[8] + bf0[12]; + bf1[9] = bf0[9] + bf0[13]; + bf1[10] = bf0[10] + bf0[14]; + bf1[11] = bf0[11] + bf0[15]; + bf1[12] = bf0[8] - bf0[12]; + bf1[13] = bf0[9] - bf0[13]; + bf1[14] = bf0[10] - bf0[14]; + bf1[15] = bf0[11] - bf0[15]; + bf1[16] = bf0[16] + bf0[20]; + bf1[17] = bf0[17] + bf0[21]; + bf1[18] = bf0[18] + bf0[22]; + bf1[19] = bf0[19] + bf0[23]; + bf1[20] = bf0[16] - bf0[20]; + bf1[21] = bf0[17] - bf0[21]; + bf1[22] = bf0[18] - bf0[22]; + bf1[23] = bf0[19] - bf0[23]; + bf1[24] = bf0[24] + bf0[28]; + bf1[25] = bf0[25] + bf0[29]; + bf1[26] = bf0[26] + bf0[30]; + bf1[27] = bf0[27] + bf0[31]; + bf1[28] = bf0[24] - bf0[28]; + bf1[29] = bf0[25] - bf0[29]; + bf1[30] = bf0[26] - bf0[30]; + bf1[31] = bf0[27] - bf0[31]; + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref buffer, stageRange[stage]); + + // stage 6 + stage++; + bf0[0] = bf1[0]; + bf0[1] = bf1[1]; + bf0[2] = bf1[2]; + bf0[3] = bf1[3]; + bf0[4] = bf1[4]; + bf0[5] = bf1[5]; + bf0[6] = bf1[6]; + bf0[7] = bf1[7]; + bf0[8] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[8], bf1[8], cospi[56], bf1[9], cosBit); + bf0[9] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[56], bf1[8], -cospi[8], bf1[9], cosBit); + bf0[10] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[40], bf1[10], cospi[24], bf1[11], cosBit); + bf0[11] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[24], bf1[10], -cospi[40], bf1[11], cosBit); + bf0[12] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[56], bf1[12], cospi[8], bf1[13], cosBit); + bf0[13] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[8], bf1[12], cospi[56], bf1[13], cosBit); + bf0[14] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[24], bf1[14], cospi[40], bf1[15], cosBit); + bf0[15] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[40], bf1[14], cospi[24], bf1[15], cosBit); + bf0[16] = bf1[16]; + bf0[17] = bf1[17]; + bf0[18] = bf1[18]; + bf0[19] = bf1[19]; + bf0[20] = bf1[20]; + bf0[21] = bf1[21]; + bf0[22] = bf1[22]; + bf0[23] = bf1[23]; + bf0[24] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[8], bf1[24], cospi[56], bf1[25], cosBit); + bf0[25] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[56], bf1[24], -cospi[8], bf1[25], cosBit); + bf0[26] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[40], bf1[26], cospi[24], bf1[27], cosBit); + bf0[27] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[24], bf1[26], -cospi[40], bf1[27], cosBit); + bf0[28] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[56], bf1[28], cospi[8], bf1[29], cosBit); + bf0[29] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[8], bf1[28], cospi[56], bf1[29], cosBit); + bf0[30] = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[24], bf1[30], cospi[40], bf1[31], cosBit); + bf0[31] = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[40], bf1[30], cospi[24], bf1[31], cosBit); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref step, stageRange[stage]); + + // stage 7 + stage++; + bf1[0] = bf0[0] + bf0[8]; + bf1[1] = bf0[1] + bf0[9]; + bf1[2] = bf0[2] + bf0[10]; + bf1[3] = bf0[3] + bf0[11]; + bf1[4] = bf0[4] + bf0[12]; + bf1[5] = bf0[5] + bf0[13]; + bf1[6] = bf0[6] + bf0[14]; + bf1[7] = bf0[7] + bf0[15]; + bf1[8] = bf0[0] - bf0[8]; + bf1[9] = bf0[1] - bf0[9]; + bf1[10] = bf0[2] - bf0[10]; + bf1[11] = bf0[3] - bf0[11]; + bf1[12] = bf0[4] - bf0[12]; + bf1[13] = bf0[5] - bf0[13]; + bf1[14] = bf0[6] - bf0[14]; + bf1[15] = bf0[7] - bf0[15]; + bf1[16] = bf0[16] + bf0[24]; + bf1[17] = bf0[17] + bf0[25]; + bf1[18] = bf0[18] + bf0[26]; + bf1[19] = bf0[19] + bf0[27]; + bf1[20] = bf0[20] + bf0[28]; + bf1[21] = bf0[21] + bf0[29]; + bf1[22] = bf0[22] + bf0[30]; + bf1[23] = bf0[23] + bf0[31]; + bf1[24] = bf0[16] - bf0[24]; + bf1[25] = bf0[17] - bf0[25]; + bf1[26] = bf0[18] - bf0[26]; + bf1[27] = bf0[19] - bf0[27]; + bf1[28] = bf0[20] - bf0[28]; + bf1[29] = bf0[21] - bf0[29]; + bf1[30] = bf0[22] - bf0[30]; + bf1[31] = bf0[23] - bf0[31]; + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref buffer, stageRange[stage]); + + // stage 8 + stage++; + Unsafe.Add(ref step, 0) = Unsafe.Add(ref buffer, 0); + Unsafe.Add(ref step, 1) = Unsafe.Add(ref buffer, 1); + Unsafe.Add(ref step, 2) = Unsafe.Add(ref buffer, 2); + Unsafe.Add(ref step, 3) = Unsafe.Add(ref buffer, 3); + Unsafe.Add(ref step, 4) = Unsafe.Add(ref buffer, 4); + Unsafe.Add(ref step, 5) = Unsafe.Add(ref buffer, 5); + Unsafe.Add(ref step, 6) = Unsafe.Add(ref buffer, 6); + Unsafe.Add(ref step, 7) = Unsafe.Add(ref buffer, 7); + Unsafe.Add(ref step, 8) = Unsafe.Add(ref buffer, 8); + Unsafe.Add(ref step, 9) = Unsafe.Add(ref buffer, 9); + Unsafe.Add(ref step, 10) = Unsafe.Add(ref buffer, 10); + Unsafe.Add(ref step, 11) = Unsafe.Add(ref buffer, 11); + Unsafe.Add(ref step, 12) = Unsafe.Add(ref buffer, 12); + Unsafe.Add(ref step, 13) = Unsafe.Add(ref buffer, 13); + Unsafe.Add(ref step, 14) = Unsafe.Add(ref buffer, 14); + Unsafe.Add(ref step, 15) = Unsafe.Add(ref buffer, 15); + Unsafe.Add(ref step, 16) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[4], Unsafe.Add(ref buffer, 16), cospi[60], Unsafe.Add(ref buffer, 17), cosBit); + Unsafe.Add(ref step, 17) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[60], Unsafe.Add(ref buffer, 16), -cospi[4], Unsafe.Add(ref buffer, 17), cosBit); + Unsafe.Add(ref step, 18) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[20], Unsafe.Add(ref buffer, 18), cospi[44], Unsafe.Add(ref buffer, 19), cosBit); + Unsafe.Add(ref step, 19) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[44], Unsafe.Add(ref buffer, 18), -cospi[20], Unsafe.Add(ref buffer, 19), cosBit); + Unsafe.Add(ref step, 20) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[36], Unsafe.Add(ref buffer, 20), cospi[28], Unsafe.Add(ref buffer, 21), cosBit); + Unsafe.Add(ref step, 21) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[28], Unsafe.Add(ref buffer, 20), -cospi[36], Unsafe.Add(ref buffer, 21), cosBit); + Unsafe.Add(ref step, 22) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[52], Unsafe.Add(ref buffer, 22), cospi[12], Unsafe.Add(ref buffer, 23), cosBit); + Unsafe.Add(ref step, 23) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[12], Unsafe.Add(ref buffer, 22), -cospi[52], Unsafe.Add(ref buffer, 23), cosBit); + Unsafe.Add(ref step, 24) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[60], Unsafe.Add(ref buffer, 24), cospi[4], Unsafe.Add(ref buffer, 25), cosBit); + Unsafe.Add(ref step, 25) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[4], Unsafe.Add(ref buffer, 24), cospi[60], Unsafe.Add(ref buffer, 25), cosBit); + Unsafe.Add(ref step, 26) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[44], Unsafe.Add(ref buffer, 26), cospi[20], Unsafe.Add(ref buffer, 27), cosBit); + Unsafe.Add(ref step, 27) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[20], Unsafe.Add(ref buffer, 26), cospi[44], Unsafe.Add(ref buffer, 27), cosBit); + Unsafe.Add(ref step, 28) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[28], Unsafe.Add(ref buffer, 28), cospi[36], Unsafe.Add(ref buffer, 29), cosBit); + Unsafe.Add(ref step, 29) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[36], Unsafe.Add(ref buffer, 28), cospi[28], Unsafe.Add(ref buffer, 29), cosBit); + Unsafe.Add(ref step, 30) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[12], Unsafe.Add(ref buffer, 30), cospi[52], Unsafe.Add(ref buffer, 31), cosBit); + Unsafe.Add(ref step, 31) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[52], Unsafe.Add(ref buffer, 30), cospi[12], Unsafe.Add(ref buffer, 31), cosBit); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref step, stageRange[stage]); + + // stage 9 + stage++; + Unsafe.Add(ref buffer, 0) = Unsafe.Add(ref step, 0) + Unsafe.Add(ref step, 16); + Unsafe.Add(ref buffer, 1) = Unsafe.Add(ref step, 1) + Unsafe.Add(ref step, 17); + Unsafe.Add(ref buffer, 2) = Unsafe.Add(ref step, 2) + Unsafe.Add(ref step, 18); + Unsafe.Add(ref buffer, 3) = Unsafe.Add(ref step, 3) + Unsafe.Add(ref step, 19); + Unsafe.Add(ref buffer, 4) = Unsafe.Add(ref step, 4) + Unsafe.Add(ref step, 20); + Unsafe.Add(ref buffer, 5) = Unsafe.Add(ref step, 5) + Unsafe.Add(ref step, 21); + Unsafe.Add(ref buffer, 6) = Unsafe.Add(ref step, 6) + Unsafe.Add(ref step, 22); + Unsafe.Add(ref buffer, 7) = Unsafe.Add(ref step, 7) + Unsafe.Add(ref step, 23); + Unsafe.Add(ref buffer, 8) = Unsafe.Add(ref step, 8) + Unsafe.Add(ref step, 24); + Unsafe.Add(ref buffer, 9) = Unsafe.Add(ref step, 9) + Unsafe.Add(ref step, 25); + Unsafe.Add(ref buffer, 10) = Unsafe.Add(ref step, 10) + Unsafe.Add(ref step, 26); + Unsafe.Add(ref buffer, 11) = Unsafe.Add(ref step, 11) + Unsafe.Add(ref step, 27); + Unsafe.Add(ref buffer, 12) = Unsafe.Add(ref step, 12) + Unsafe.Add(ref step, 28); + Unsafe.Add(ref buffer, 13) = Unsafe.Add(ref step, 13) + Unsafe.Add(ref step, 29); + Unsafe.Add(ref buffer, 14) = Unsafe.Add(ref step, 14) + Unsafe.Add(ref step, 30); + Unsafe.Add(ref buffer, 15) = Unsafe.Add(ref step, 15) + Unsafe.Add(ref step, 31); + Unsafe.Add(ref buffer, 16) = Unsafe.Add(ref step, 0) - Unsafe.Add(ref step, 16); + Unsafe.Add(ref buffer, 17) = Unsafe.Add(ref step, 1) - Unsafe.Add(ref step, 17); + Unsafe.Add(ref buffer, 18) = Unsafe.Add(ref step, 2) - Unsafe.Add(ref step, 18); + Unsafe.Add(ref buffer, 19) = Unsafe.Add(ref step, 3) - Unsafe.Add(ref step, 19); + Unsafe.Add(ref buffer, 20) = Unsafe.Add(ref step, 4) - Unsafe.Add(ref step, 20); + Unsafe.Add(ref buffer, 21) = Unsafe.Add(ref step, 5) - Unsafe.Add(ref step, 21); + Unsafe.Add(ref buffer, 22) = Unsafe.Add(ref step, 6) - Unsafe.Add(ref step, 22); + Unsafe.Add(ref buffer, 23) = Unsafe.Add(ref step, 7) - Unsafe.Add(ref step, 23); + Unsafe.Add(ref buffer, 24) = Unsafe.Add(ref step, 8) - Unsafe.Add(ref step, 24); + Unsafe.Add(ref buffer, 25) = Unsafe.Add(ref step, 9) - Unsafe.Add(ref step, 25); + Unsafe.Add(ref buffer, 26) = Unsafe.Add(ref step, 10) - Unsafe.Add(ref step, 26); + Unsafe.Add(ref buffer, 27) = Unsafe.Add(ref step, 11) - Unsafe.Add(ref step, 27); + Unsafe.Add(ref buffer, 28) = Unsafe.Add(ref step, 12) - Unsafe.Add(ref step, 28); + Unsafe.Add(ref buffer, 29) = Unsafe.Add(ref step, 13) - Unsafe.Add(ref step, 29); + Unsafe.Add(ref buffer, 30) = Unsafe.Add(ref step, 14) - Unsafe.Add(ref step, 30); + Unsafe.Add(ref buffer, 31) = Unsafe.Add(ref step, 15) - Unsafe.Add(ref step, 31); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref buffer, stageRange[stage]); + + // stage 10 + stage++; + Unsafe.Add(ref step, 0) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[1], Unsafe.Add(ref buffer, 0), cospi[63], Unsafe.Add(ref buffer, 1), cosBit); + Unsafe.Add(ref step, 1) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[63], Unsafe.Add(ref buffer, 0), -cospi[1], Unsafe.Add(ref buffer, 1), cosBit); + Unsafe.Add(ref step, 2) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[5], Unsafe.Add(ref buffer, 2), cospi[59], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 3) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[59], Unsafe.Add(ref buffer, 2), -cospi[5], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 4) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[9], Unsafe.Add(ref buffer, 4), cospi[55], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 5) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[55], Unsafe.Add(ref buffer, 4), -cospi[9], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 6) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[13], Unsafe.Add(ref buffer, 6), cospi[51], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 7) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[51], Unsafe.Add(ref buffer, 6), -cospi[13], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 8) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[17], Unsafe.Add(ref buffer, 8), cospi[47], Unsafe.Add(ref buffer, 9), cosBit); + Unsafe.Add(ref step, 9) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[47], Unsafe.Add(ref buffer, 8), -cospi[17], Unsafe.Add(ref buffer, 9), cosBit); + Unsafe.Add(ref step, 10) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[21], Unsafe.Add(ref buffer, 10), cospi[43], Unsafe.Add(ref buffer, 11), cosBit); + Unsafe.Add(ref step, 11) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[43], Unsafe.Add(ref buffer, 10), -cospi[21], Unsafe.Add(ref buffer, 11), cosBit); + Unsafe.Add(ref step, 12) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[25], Unsafe.Add(ref buffer, 12), cospi[39], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 13) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[39], Unsafe.Add(ref buffer, 12), -cospi[25], Unsafe.Add(ref buffer, 13), cosBit); + Unsafe.Add(ref step, 14) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[29], Unsafe.Add(ref buffer, 14), cospi[35], Unsafe.Add(ref buffer, 15), cosBit); + Unsafe.Add(ref step, 15) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[35], Unsafe.Add(ref buffer, 14), -cospi[29], Unsafe.Add(ref buffer, 15), cosBit); + Unsafe.Add(ref step, 16) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[33], Unsafe.Add(ref buffer, 16), cospi[31], Unsafe.Add(ref buffer, 17), cosBit); + Unsafe.Add(ref step, 17) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[31], Unsafe.Add(ref buffer, 16), -cospi[33], Unsafe.Add(ref buffer, 17), cosBit); + Unsafe.Add(ref step, 18) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[37], Unsafe.Add(ref buffer, 18), cospi[27], Unsafe.Add(ref buffer, 19), cosBit); + Unsafe.Add(ref step, 19) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[27], Unsafe.Add(ref buffer, 18), -cospi[37], Unsafe.Add(ref buffer, 19), cosBit); + Unsafe.Add(ref step, 20) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[41], Unsafe.Add(ref buffer, 20), cospi[23], Unsafe.Add(ref buffer, 21), cosBit); + Unsafe.Add(ref step, 21) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[23], Unsafe.Add(ref buffer, 20), -cospi[41], Unsafe.Add(ref buffer, 21), cosBit); + Unsafe.Add(ref step, 22) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[45], Unsafe.Add(ref buffer, 22), cospi[19], Unsafe.Add(ref buffer, 23), cosBit); + Unsafe.Add(ref step, 23) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[19], Unsafe.Add(ref buffer, 22), -cospi[45], Unsafe.Add(ref buffer, 23), cosBit); + Unsafe.Add(ref step, 24) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[49], Unsafe.Add(ref buffer, 24), cospi[15], Unsafe.Add(ref buffer, 25), cosBit); + Unsafe.Add(ref step, 25) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[15], Unsafe.Add(ref buffer, 24), -cospi[49], Unsafe.Add(ref buffer, 25), cosBit); + Unsafe.Add(ref step, 26) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[53], Unsafe.Add(ref buffer, 26), cospi[11], Unsafe.Add(ref buffer, 27), cosBit); + Unsafe.Add(ref step, 27) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[11], Unsafe.Add(ref buffer, 26), -cospi[53], Unsafe.Add(ref buffer, 27), cosBit); + Unsafe.Add(ref step, 28) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[57], Unsafe.Add(ref buffer, 28), cospi[7], Unsafe.Add(ref buffer, 29), cosBit); + Unsafe.Add(ref step, 29) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[7], Unsafe.Add(ref buffer, 28), -cospi[57], Unsafe.Add(ref buffer, 29), cosBit); + Unsafe.Add(ref step, 30) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[61], Unsafe.Add(ref buffer, 30), cospi[3], Unsafe.Add(ref buffer, 31), cosBit); + Unsafe.Add(ref step, 31) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[3], Unsafe.Add(ref buffer, 30), -cospi[61], Unsafe.Add(ref buffer, 31), cosBit); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref step, stageRange[stage]); + + // stage 11 + stage++; + Unsafe.Add(ref output, 0) = Unsafe.Add(ref step, 1); + Unsafe.Add(ref output, 1) = Unsafe.Add(ref step, 30); + Unsafe.Add(ref output, 2) = Unsafe.Add(ref step, 3); + Unsafe.Add(ref output, 3) = Unsafe.Add(ref step, 28); + Unsafe.Add(ref output, 4) = Unsafe.Add(ref step, 5); + Unsafe.Add(ref output, 5) = Unsafe.Add(ref step, 26); + Unsafe.Add(ref output, 6) = Unsafe.Add(ref step, 7); + Unsafe.Add(ref output, 7) = Unsafe.Add(ref step, 24); + Unsafe.Add(ref output, 8) = Unsafe.Add(ref step, 9); + Unsafe.Add(ref output, 9) = Unsafe.Add(ref step, 22); + Unsafe.Add(ref output, 10) = Unsafe.Add(ref step, 11); + Unsafe.Add(ref output, 11) = Unsafe.Add(ref step, 20); + Unsafe.Add(ref output, 12) = Unsafe.Add(ref step, 13); + Unsafe.Add(ref output, 13) = Unsafe.Add(ref step, 18); + Unsafe.Add(ref output, 14) = Unsafe.Add(ref step, 15); + Unsafe.Add(ref output, 15) = Unsafe.Add(ref step, 16); + Unsafe.Add(ref output, 16) = Unsafe.Add(ref step, 17); + Unsafe.Add(ref output, 17) = Unsafe.Add(ref step, 14); + Unsafe.Add(ref output, 18) = Unsafe.Add(ref step, 19); + Unsafe.Add(ref output, 19) = Unsafe.Add(ref step, 12); + Unsafe.Add(ref output, 20) = Unsafe.Add(ref step, 21); + Unsafe.Add(ref output, 21) = Unsafe.Add(ref step, 10); + Unsafe.Add(ref output, 22) = Unsafe.Add(ref step, 23); + Unsafe.Add(ref output, 23) = Unsafe.Add(ref step, 8); + Unsafe.Add(ref output, 24) = Unsafe.Add(ref step, 25); + Unsafe.Add(ref output, 25) = Unsafe.Add(ref step, 6); + Unsafe.Add(ref output, 26) = Unsafe.Add(ref step, 27); + Unsafe.Add(ref output, 27) = Unsafe.Add(ref step, 4); + Unsafe.Add(ref output, 28) = Unsafe.Add(ref step, 29); + Unsafe.Add(ref output, 29) = Unsafe.Add(ref step, 2); + Unsafe.Add(ref output, 30) = Unsafe.Add(ref step, 31); + Unsafe.Add(ref output, 31) = Unsafe.Add(ref step, 0); + Av1Dct4Inverse1dTransformer.ClampBuffer32(ref output, stageRange[stage]); + } +} diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst4Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst4Inverse1dTransformer.cs index 3c0fa7d566..fc94ee6b3f 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst4Inverse1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst4Inverse1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; -internal class Av1Adst4Inverse1dTransformer : IAv1Forward1dTransformer +internal class Av1Adst4Inverse1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst8Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst8Inverse1dTransformer.cs new file mode 100644 index 0000000000..104cf979d3 --- /dev/null +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Adst8Inverse1dTransformer.cs @@ -0,0 +1,121 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; + +internal class Av1Adst8Inverse1dTransformer : IAv1Transformer1d +{ + public void Transform(Span input, Span output, int cosBit, Span stageRange) + { + Guard.MustBeSizedAtLeast(input, 8, nameof(input)); + Guard.MustBeSizedAtLeast(output, 8, nameof(output)); + TransformScalar(ref input[0], ref output[0], cosBit, stageRange); + } + + /// + /// SVT: svt_av1_iadst8_new + /// + private static void TransformScalar(ref int input, ref int output, int cosBit, Span stageRange) + { + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + + int stage = 0; + Span stepSpan = stackalloc int[8]; + ref int step = ref stepSpan[0]; + Span bufferSpan = stackalloc int[8]; + ref int buffer = ref bufferSpan[0]; + + // stage 0; + + // stage 1; + stage++; + buffer = Unsafe.Add(ref input, 7); + Unsafe.Add(ref buffer, 1) = input; + Unsafe.Add(ref buffer, 2) = Unsafe.Add(ref input, 5); + Unsafe.Add(ref buffer, 3) = Unsafe.Add(ref input, 2); + Unsafe.Add(ref buffer, 4) = Unsafe.Add(ref input, 3); + Unsafe.Add(ref buffer, 5) = Unsafe.Add(ref input, 4); + Unsafe.Add(ref buffer, 6) = Unsafe.Add(ref input, 1); + Unsafe.Add(ref buffer, 7) = Unsafe.Add(ref input, 6); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + step = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[4], buffer, cospi[60], Unsafe.Add(ref buffer, 1), cosBit); + Unsafe.Add(ref step, 1) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[60], buffer, -cospi[4], Unsafe.Add(ref buffer, 1), cosBit); + Unsafe.Add(ref step, 2) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[20], Unsafe.Add(ref buffer, 2), cospi[44], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 3) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[44], Unsafe.Add(ref buffer, 2), -cospi[20], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 4) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[36], Unsafe.Add(ref buffer, 4), cospi[28], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 5) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[28], Unsafe.Add(ref buffer, 4), -cospi[36], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 6) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[52], Unsafe.Add(ref buffer, 6), cospi[12], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 7) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[12], Unsafe.Add(ref buffer, 6), -cospi[52], Unsafe.Add(ref buffer, 7), cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + byte range = stageRange[stage]; + buffer = Av1Dct4Inverse1dTransformer.ClampValue(step + Unsafe.Add(ref step, 4), range); + Unsafe.Add(ref buffer, 1) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) + Unsafe.Add(ref step, 5), range); + Unsafe.Add(ref buffer, 2) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 2) + Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 3) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 3) + Unsafe.Add(ref step, 7), range); + Unsafe.Add(ref buffer, 4) = Av1Dct4Inverse1dTransformer.ClampValue(step - Unsafe.Add(ref step, 4), range); + Unsafe.Add(ref buffer, 5) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) - Unsafe.Add(ref step, 5), range); + Unsafe.Add(ref buffer, 6) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 2) - Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 7) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 3) - Unsafe.Add(ref step, 7), range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + step = buffer; + Unsafe.Add(ref step, 1) = Unsafe.Add(ref buffer, 1); + Unsafe.Add(ref step, 2) = Unsafe.Add(ref buffer, 2); + Unsafe.Add(ref step, 3) = Unsafe.Add(ref buffer, 3); + Unsafe.Add(ref step, 4) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], Unsafe.Add(ref buffer, 4), cospi[48], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 5) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[48], Unsafe.Add(ref buffer, 4), -cospi[16], Unsafe.Add(ref buffer, 5), cosBit); + Unsafe.Add(ref step, 6) = Av1Dct4Inverse1dTransformer.HalfButterfly(-cospi[48], Unsafe.Add(ref buffer, 6), cospi[16], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 7) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[16], Unsafe.Add(ref buffer, 6), cospi[48], Unsafe.Add(ref buffer, 7), cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + range = stageRange[stage]; + buffer = Av1Dct4Inverse1dTransformer.ClampValue(step + Unsafe.Add(ref step, 2), range); + Unsafe.Add(ref buffer, 1) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) + Unsafe.Add(ref step, 3), range); + Unsafe.Add(ref buffer, 2) = Av1Dct4Inverse1dTransformer.ClampValue(step - Unsafe.Add(ref step, 2), range); + Unsafe.Add(ref buffer, 3) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 1) - Unsafe.Add(ref step, 3), range); + Unsafe.Add(ref buffer, 4) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 4) + Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 5) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 5) + Unsafe.Add(ref step, 7), range); + Unsafe.Add(ref buffer, 6) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 4) - Unsafe.Add(ref step, 6), range); + Unsafe.Add(ref buffer, 7) = Av1Dct4Inverse1dTransformer.ClampValue(Unsafe.Add(ref step, 5) - Unsafe.Add(ref step, 7), range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + step = buffer; + Unsafe.Add(ref step, 1) = Unsafe.Add(ref buffer, 1); + Unsafe.Add(ref step, 2) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 2), cospi[32], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 3) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 2), -cospi[32], Unsafe.Add(ref buffer, 3), cosBit); + Unsafe.Add(ref step, 4) = Unsafe.Add(ref buffer, 4); + Unsafe.Add(ref step, 5) = Unsafe.Add(ref buffer, 5); + Unsafe.Add(ref step, 6) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 6), cospi[32], Unsafe.Add(ref buffer, 7), cosBit); + Unsafe.Add(ref step, 7) = Av1Dct4Inverse1dTransformer.HalfButterfly(cospi[32], Unsafe.Add(ref buffer, 6), -cospi[32], Unsafe.Add(ref buffer, 7), cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + output = step; + Unsafe.Add(ref output, 1) = -Unsafe.Add(ref step, 4); + Unsafe.Add(ref output, 2) = Unsafe.Add(ref step, 6); + Unsafe.Add(ref output, 3) = -Unsafe.Add(ref step, 2); + Unsafe.Add(ref output, 4) = Unsafe.Add(ref step, 3); + Unsafe.Add(ref output, 5) = -Unsafe.Add(ref step, 7); + Unsafe.Add(ref output, 6) = Unsafe.Add(ref step, 5); + Unsafe.Add(ref output, 7) = -Unsafe.Add(ref step, 1); + } +} diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct16Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct16Inverse1dTransformer.cs new file mode 100644 index 0000000000..028f32e4ec --- /dev/null +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct16Inverse1dTransformer.cs @@ -0,0 +1,211 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; + +internal class Av1Dct16Inverse1dTransformer : IAv1Transformer1d +{ + public void Transform(Span input, Span output, int cosBit, Span stageRange) + { + Guard.MustBeSizedAtLeast(input, 16, nameof(input)); + Guard.MustBeSizedAtLeast(output, 16, nameof(output)); + TransformScalar(ref input[0], ref output[0], cosBit, stageRange); + } + + /// + /// SVT: svt_av1_idct16_new + /// + private static void TransformScalar(ref int input, ref int output, int cosBit, Span stageRange) + { + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + int stage = 0; + Span temp0 = stackalloc int[16]; + Span temp1 = stackalloc int[16]; + + // stage 0; + + // stage 1; + stage++; + temp0[0] = input; + temp0[1] = Unsafe.Add(ref input, 8); + temp0[2] = Unsafe.Add(ref input, 4); + temp0[3] = Unsafe.Add(ref input, 12); + temp0[4] = Unsafe.Add(ref input, 2); + temp0[5] = Unsafe.Add(ref input, 10); + temp0[6] = Unsafe.Add(ref input, 6); + temp0[7] = Unsafe.Add(ref input, 14); + temp0[8] = Unsafe.Add(ref input, 1); + temp0[9] = Unsafe.Add(ref input, 9); + temp0[10] = Unsafe.Add(ref input, 5); + temp0[11] = Unsafe.Add(ref input, 13); + temp0[12] = Unsafe.Add(ref input, 3); + temp0[13] = Unsafe.Add(ref input, 11); + temp0[14] = Unsafe.Add(ref input, 7); + temp0[15] = Unsafe.Add(ref input, 15); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = temp0[6]; + temp1[7] = temp0[7]; + temp1[8] = HalfButterfly(cospi[60], temp0[8], -cospi[4], temp0[15], cosBit); + temp1[9] = HalfButterfly(cospi[28], temp0[9], -cospi[36], temp0[14], cosBit); + temp1[10] = HalfButterfly(cospi[44], temp0[10], -cospi[20], temp0[13], cosBit); + temp1[11] = HalfButterfly(cospi[12], temp0[11], -cospi[52], temp0[12], cosBit); + temp1[12] = HalfButterfly(cospi[52], temp0[11], cospi[12], temp0[12], cosBit); + temp1[13] = HalfButterfly(cospi[20], temp0[10], cospi[44], temp0[13], cosBit); + temp1[14] = HalfButterfly(cospi[36], temp0[9], cospi[28], temp0[14], cosBit); + temp1[15] = HalfButterfly(cospi[4], temp0[8], cospi[60], temp0[15], cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + byte range = stageRange[stage]; + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = HalfButterfly(cospi[56], temp1[4], -cospi[8], temp1[7], cosBit); + temp0[5] = HalfButterfly(cospi[24], temp1[5], -cospi[40], temp1[6], cosBit); + temp0[6] = HalfButterfly(cospi[40], temp1[5], cospi[24], temp1[6], cosBit); + temp0[7] = HalfButterfly(cospi[8], temp1[4], cospi[56], temp1[7], cosBit); + temp0[8] = ClampValue(temp1[8] + temp1[9], range); + temp0[9] = ClampValue(temp1[8] - temp1[9], range); + temp0[10] = ClampValue(temp1[11] - temp1[10], range); + temp0[11] = ClampValue(temp1[10] + temp1[11], range); + temp0[12] = ClampValue(temp1[12] + temp1[13], range); + temp0[13] = ClampValue(temp1[12] - temp1[13], range); + temp0[14] = ClampValue(temp1[15] - temp1[14], range); + temp0[15] = ClampValue(temp1[14] + temp1[15], range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + range = stageRange[stage]; + temp1[0] = HalfButterfly(cospi[32], temp0[0], cospi[32], temp0[1], cosBit); + temp1[1] = HalfButterfly(cospi[32], temp0[0], -cospi[32], temp0[1], cosBit); + temp1[2] = HalfButterfly(cospi[48], temp0[2], -cospi[16], temp0[3], cosBit); + temp1[3] = HalfButterfly(cospi[16], temp0[2], cospi[48], temp0[3], cosBit); + temp1[4] = ClampValue(temp0[4] + temp0[5], range); + temp1[5] = ClampValue(temp0[4] - temp0[5], range); + temp1[6] = ClampValue(temp0[7] - temp0[6], range); + temp1[7] = ClampValue(temp0[6] + temp0[7], range); + temp1[8] = temp0[8]; + temp1[9] = HalfButterfly(-cospi[16], temp0[9], cospi[48], temp0[14], cosBit); + temp1[10] = HalfButterfly(-cospi[48], temp0[10], -cospi[16], temp0[13], cosBit); + temp1[11] = temp0[11]; + temp1[12] = temp0[12]; + temp1[13] = HalfButterfly(-cospi[16], temp0[10], cospi[48], temp0[13], cosBit); + temp1[14] = HalfButterfly(cospi[48], temp0[9], cospi[16], temp0[14], cosBit); + temp1[15] = temp0[15]; + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + range = stageRange[stage]; + temp0[0] = ClampValue(temp1[0] + temp1[3], range); + temp0[1] = ClampValue(temp1[1] + temp1[2], range); + temp0[2] = ClampValue(temp1[1] - temp1[2], range); + temp0[3] = ClampValue(temp1[0] - temp1[3], range); + temp0[4] = temp1[4]; + temp0[5] = HalfButterfly(-cospi[32], temp1[5], cospi[32], temp1[6], cosBit); + temp0[6] = HalfButterfly(cospi[32], temp1[5], cospi[32], temp1[6], cosBit); + temp0[7] = temp1[7]; + temp0[8] = ClampValue(temp1[8] + temp1[11], range); + temp0[9] = ClampValue(temp1[9] + temp1[10], range); + temp0[10] = ClampValue(temp1[9] - temp1[10], range); + temp0[11] = ClampValue(temp1[8] - temp1[11], range); + temp0[12] = ClampValue(temp1[15] - temp1[12], range); + temp0[13] = ClampValue(temp1[14] - temp1[13], range); + temp0[14] = ClampValue(temp1[13] + temp1[14], range); + temp0[15] = ClampValue(temp1[12] + temp1[15], range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 6 + stage++; + range = stageRange[stage]; + temp1[0] = ClampValue(temp0[0] + temp0[7], range); + temp1[1] = ClampValue(temp0[1] + temp0[6], range); + temp1[2] = ClampValue(temp0[2] + temp0[5], range); + temp1[3] = ClampValue(temp0[3] + temp0[4], range); + temp1[4] = ClampValue(temp0[3] - temp0[4], range); + temp1[5] = ClampValue(temp0[2] - temp0[5], range); + temp1[6] = ClampValue(temp0[1] - temp0[6], range); + temp1[7] = ClampValue(temp0[0] - temp0[7], range); + temp1[8] = temp0[8]; + temp1[9] = temp0[9]; + temp1[10] = HalfButterfly(-cospi[32], temp0[10], cospi[32], temp0[13], cosBit); + temp1[11] = HalfButterfly(-cospi[32], temp0[11], cospi[32], temp0[12], cosBit); + temp1[12] = HalfButterfly(cospi[32], temp0[11], cospi[32], temp0[12], cosBit); + temp1[13] = HalfButterfly(cospi[32], temp0[10], cospi[32], temp0[13], cosBit); + temp1[14] = temp0[14]; + temp1[15] = temp0[15]; + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 7 + stage++; + range = stageRange[stage]; + Unsafe.Add(ref output, 0) = ClampValue(temp1[0] + temp1[15], range); + Unsafe.Add(ref output, 1) = ClampValue(temp1[1] + temp1[14], range); + Unsafe.Add(ref output, 2) = ClampValue(temp1[2] + temp1[13], range); + Unsafe.Add(ref output, 3) = ClampValue(temp1[3] + temp1[12], range); + Unsafe.Add(ref output, 4) = ClampValue(temp1[4] + temp1[11], range); + Unsafe.Add(ref output, 5) = ClampValue(temp1[5] + temp1[10], range); + Unsafe.Add(ref output, 6) = ClampValue(temp1[6] + temp1[9], range); + Unsafe.Add(ref output, 7) = ClampValue(temp1[7] + temp1[8], range); + Unsafe.Add(ref output, 8) = ClampValue(temp1[7] - temp1[8], range); + Unsafe.Add(ref output, 9) = ClampValue(temp1[6] - temp1[9], range); + Unsafe.Add(ref output, 10) = ClampValue(temp1[5] - temp1[10], range); + Unsafe.Add(ref output, 11) = ClampValue(temp1[4] - temp1[11], range); + Unsafe.Add(ref output, 12) = ClampValue(temp1[3] - temp1[12], range); + Unsafe.Add(ref output, 13) = ClampValue(temp1[2] - temp1[13], range); + Unsafe.Add(ref output, 14) = ClampValue(temp1[1] - temp1[14], range); + Unsafe.Add(ref output, 15) = ClampValue(temp1[0] - temp1[15], range); + } + + internal static int ClampValue(int value, byte bit) + { + if (bit <= 0) + { + return value; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + return (int)Av1Math.Clamp(value, min_value, max_value); + } + + internal static int HalfButterfly(int w0, int in0, int w1, int in1, int bit) + { + long result64 = (long)(w0 * in0) + (w1 * in1); + long intermediate = result64 + (1L << (bit - 1)); + + // NOTE(david.barker): The value 'result_64' may not necessarily fit + // into 32 bits. However, the result of this function is nominally + // ROUND_POWER_OF_TWO_64(result_64, bit) + // and that is required to fit into stage_range[stage] many bits + // (checked by range_check_buf()). + // + // Here we've unpacked that rounding operation, and it can be shown + // that the value of 'intermediate' here *does* fit into 32 bits + // for any conformant bitstream. + // The upshot is that, if you do all this calculation using + // wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic, + // then you'll still get the correct result. + return (int)(intermediate >> bit); + } +} diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct32Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct32Inverse1dTransformer.cs new file mode 100644 index 0000000000..9107e9eacb --- /dev/null +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct32Inverse1dTransformer.cs @@ -0,0 +1,399 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; + +internal class Av1Dct32Inverse1dTransformer : IAv1Transformer1d +{ + public void Transform(Span input, Span output, int cosBit, Span stageRange) + { + Guard.MustBeSizedAtLeast(input, 32, nameof(input)); + Guard.MustBeSizedAtLeast(output, 32, nameof(output)); + TransformScalar(ref input[0], ref output[0], cosBit, stageRange); + } + + /// + /// SVT: svt_av1_idct32_new + /// + private static void TransformScalar(ref int input, ref int output, int cosBit, Span stageRange) + { + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + int stage = 0; + Span temp0 = stackalloc int[32]; + Span temp1 = stackalloc int[32]; + + // stage 0; + + // stage 1; + stage++; + temp1[0] = Unsafe.Add(ref input, 0); + temp1[1] = Unsafe.Add(ref input, 16); + temp1[2] = Unsafe.Add(ref input, 8); + temp1[3] = Unsafe.Add(ref input, 24); + temp1[4] = Unsafe.Add(ref input, 4); + temp1[5] = Unsafe.Add(ref input, 20); + temp1[6] = Unsafe.Add(ref input, 12); + temp1[7] = Unsafe.Add(ref input, 28); + temp1[8] = Unsafe.Add(ref input, 2); + temp1[9] = Unsafe.Add(ref input, 18); + temp1[10] = Unsafe.Add(ref input, 10); + temp1[11] = Unsafe.Add(ref input, 26); + temp1[12] = Unsafe.Add(ref input, 6); + temp1[13] = Unsafe.Add(ref input, 22); + temp1[14] = Unsafe.Add(ref input, 14); + temp1[15] = Unsafe.Add(ref input, 30); + temp1[16] = Unsafe.Add(ref input, 1); + temp1[17] = Unsafe.Add(ref input, 17); + temp1[18] = Unsafe.Add(ref input, 9); + temp1[19] = Unsafe.Add(ref input, 25); + temp1[20] = Unsafe.Add(ref input, 5); + temp1[21] = Unsafe.Add(ref input, 21); + temp1[22] = Unsafe.Add(ref input, 13); + temp1[23] = Unsafe.Add(ref input, 29); + temp1[24] = Unsafe.Add(ref input, 3); + temp1[25] = Unsafe.Add(ref input, 19); + temp1[26] = Unsafe.Add(ref input, 11); + temp1[27] = Unsafe.Add(ref input, 27); + temp1[28] = Unsafe.Add(ref input, 7); + temp1[29] = Unsafe.Add(ref input, 23); + temp1[30] = Unsafe.Add(ref input, 15); + temp1[31] = Unsafe.Add(ref input, 31); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 2 + stage++; + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = temp1[10]; + temp0[11] = temp1[11]; + temp0[12] = temp1[12]; + temp0[13] = temp1[13]; + temp0[14] = temp1[14]; + temp0[15] = temp1[15]; + temp0[16] = HalfButterfly(cospi[62], temp1[16], -cospi[2], temp1[31], cosBit); + temp0[17] = HalfButterfly(cospi[30], temp1[17], -cospi[34], temp1[30], cosBit); + temp0[18] = HalfButterfly(cospi[46], temp1[18], -cospi[18], temp1[29], cosBit); + temp0[19] = HalfButterfly(cospi[14], temp1[19], -cospi[50], temp1[28], cosBit); + temp0[20] = HalfButterfly(cospi[54], temp1[20], -cospi[10], temp1[27], cosBit); + temp0[21] = HalfButterfly(cospi[22], temp1[21], -cospi[42], temp1[26], cosBit); + temp0[22] = HalfButterfly(cospi[38], temp1[22], -cospi[26], temp1[25], cosBit); + temp0[23] = HalfButterfly(cospi[6], temp1[23], -cospi[58], temp1[24], cosBit); + temp0[24] = HalfButterfly(cospi[58], temp1[23], cospi[6], temp1[24], cosBit); + temp0[25] = HalfButterfly(cospi[26], temp1[22], cospi[38], temp1[25], cosBit); + temp0[26] = HalfButterfly(cospi[42], temp1[21], cospi[22], temp1[26], cosBit); + temp0[27] = HalfButterfly(cospi[10], temp1[20], cospi[54], temp1[27], cosBit); + temp0[28] = HalfButterfly(cospi[50], temp1[19], cospi[14], temp1[28], cosBit); + temp0[29] = HalfButterfly(cospi[18], temp1[18], cospi[46], temp1[29], cosBit); + temp0[30] = HalfButterfly(cospi[34], temp1[17], cospi[30], temp1[30], cosBit); + temp0[31] = HalfButterfly(cospi[2], temp1[16], cospi[62], temp1[31], cosBit); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 3 + stage++; + byte range = stageRange[stage]; + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = temp0[6]; + temp1[7] = temp0[7]; + temp1[8] = HalfButterfly(cospi[60], temp0[8], -cospi[4], temp0[15], cosBit); + temp1[9] = HalfButterfly(cospi[28], temp0[9], -cospi[36], temp0[14], cosBit); + temp1[10] = HalfButterfly(cospi[44], temp0[10], -cospi[20], temp0[13], cosBit); + temp1[11] = HalfButterfly(cospi[12], temp0[11], -cospi[52], temp0[12], cosBit); + temp1[12] = HalfButterfly(cospi[52], temp0[11], cospi[12], temp0[12], cosBit); + temp1[13] = HalfButterfly(cospi[20], temp0[10], cospi[44], temp0[13], cosBit); + temp1[14] = HalfButterfly(cospi[36], temp0[9], cospi[28], temp0[14], cosBit); + temp1[15] = HalfButterfly(cospi[4], temp0[8], cospi[60], temp0[15], cosBit); + temp1[16] = ClampValue(temp0[16] + temp0[17], range); + temp1[17] = ClampValue(temp0[16] - temp0[17], range); + temp1[18] = ClampValue(-temp0[18] + temp0[19], range); + temp1[19] = ClampValue(temp0[18] + temp0[19], range); + temp1[20] = ClampValue(temp0[20] + temp0[21], range); + temp1[21] = ClampValue(temp0[20] - temp0[21], range); + temp1[22] = ClampValue(-temp0[22] + temp0[23], range); + temp1[23] = ClampValue(temp0[22] + temp0[23], range); + temp1[24] = ClampValue(temp0[24] + temp0[25], range); + temp1[25] = ClampValue(temp0[24] - temp0[25], range); + temp1[26] = ClampValue(-temp0[26] + temp0[27], range); + temp1[27] = ClampValue(temp0[26] + temp0[27], range); + temp1[28] = ClampValue(temp0[28] + temp0[29], range); + temp1[29] = ClampValue(temp0[28] - temp0[29], range); + temp1[30] = ClampValue(-temp0[30] + temp0[31], range); + temp1[31] = ClampValue(temp0[30] + temp0[31], range); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 4 + stage++; + range = stageRange[stage]; + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = HalfButterfly(cospi[56], temp1[4], -cospi[8], temp1[7], cosBit); + temp0[5] = HalfButterfly(cospi[24], temp1[5], -cospi[40], temp1[6], cosBit); + temp0[6] = HalfButterfly(cospi[40], temp1[5], cospi[24], temp1[6], cosBit); + temp0[7] = HalfButterfly(cospi[8], temp1[4], cospi[56], temp0[7], cosBit); + temp0[8] = ClampValue(temp1[8] + temp1[9], range); + temp0[9] = ClampValue(temp1[8] - temp1[9], range); + temp0[10] = ClampValue(-temp1[10] + temp1[11], range); + temp0[11] = ClampValue(temp1[10] + temp1[11], range); + temp0[12] = ClampValue(temp1[12] + temp1[13], range); + temp0[13] = ClampValue(temp1[12] - temp1[13], range); + temp0[14] = ClampValue(-temp1[14] + temp1[15], range); + temp0[15] = ClampValue(temp1[14] + temp1[15], range); + temp0[16] = temp1[16]; + temp0[17] = HalfButterfly(-cospi[8], temp1[17], cospi[56], temp1[30], cosBit); + temp0[18] = HalfButterfly(-cospi[56], temp1[18], -cospi[8], temp1[29], cosBit); + temp0[19] = temp1[19]; + temp0[20] = temp1[20]; + temp0[21] = HalfButterfly(-cospi[40], temp1[21], cospi[24], temp1[26], cosBit); + temp0[22] = HalfButterfly(-cospi[24], temp1[22], -cospi[40], temp1[25], cosBit); + temp0[23] = temp1[23]; + temp0[24] = temp1[24]; + temp0[25] = HalfButterfly(-cospi[40], temp1[22], cospi[24], temp1[25], cosBit); + temp0[26] = HalfButterfly(cospi[24], temp1[21], cospi[40], temp1[26], cosBit); + temp0[27] = temp1[27]; + temp0[28] = temp1[28]; + temp0[29] = HalfButterfly(-cospi[8], temp1[18], cospi[56], temp1[29], cosBit); + temp0[30] = HalfButterfly(cospi[56], temp1[17], cospi[8], temp1[30], cosBit); + temp0[31] = temp1[31]; + + // range_check_buf(stage, input, bf1, size, range); + + // stage 5 + stage++; + range = stageRange[stage]; + temp1[0] = HalfButterfly(cospi[32], temp0[0], cospi[32], temp0[1], cosBit); + temp1[1] = HalfButterfly(cospi[32], temp0[0], -cospi[32], temp0[1], cosBit); + temp1[2] = HalfButterfly(cospi[48], temp0[2], -cospi[16], temp0[3], cosBit); + temp1[3] = HalfButterfly(cospi[16], temp0[2], cospi[48], temp0[3], cosBit); + temp1[4] = ClampValue(temp0[4] + temp0[5], range); + temp1[5] = ClampValue(temp0[4] - temp0[5], range); + temp1[6] = ClampValue(-temp0[6] + temp0[7], range); + temp1[7] = ClampValue(temp0[6] + temp0[7], range); + temp1[8] = temp0[8]; + temp1[9] = HalfButterfly(-cospi[16], temp0[9], cospi[48], temp0[14], cosBit); + temp1[10] = HalfButterfly(-cospi[48], temp0[10], -cospi[16], temp0[13], cosBit); + temp1[11] = temp0[11]; + temp1[12] = temp0[12]; + temp1[13] = HalfButterfly(-cospi[16], temp0[10], cospi[48], temp0[13], cosBit); + temp1[14] = HalfButterfly(cospi[48], temp0[9], cospi[16], temp0[14], cosBit); + temp1[15] = temp0[15]; + temp1[16] = ClampValue(temp0[16] + temp0[19], range); + temp1[17] = ClampValue(temp0[17] + temp0[18], range); + temp1[18] = ClampValue(temp0[17] - temp0[18], range); + temp1[19] = ClampValue(temp0[16] - temp0[19], range); + temp1[20] = ClampValue(-temp0[20] + temp0[23], range); + temp1[21] = ClampValue(-temp0[21] + temp0[22], range); + temp1[22] = ClampValue(temp0[21] + temp0[22], range); + temp1[23] = ClampValue(temp0[20] + temp0[23], range); + temp1[24] = ClampValue(temp0[24] + temp0[27], range); + temp1[25] = ClampValue(temp0[25] + temp0[26], range); + temp1[26] = ClampValue(temp0[25] - temp0[26], range); + temp1[27] = ClampValue(temp0[24] - temp0[27], range); + temp1[28] = ClampValue(-temp0[28] + temp0[31], range); + temp1[29] = ClampValue(-temp0[29] + temp0[30], range); + temp1[30] = ClampValue(temp0[29] + temp0[30], range); + temp1[31] = ClampValue(temp0[28] + temp0[31], range); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 6 + stage++; + range = stageRange[stage]; + temp0[0] = ClampValue(temp1[0] + temp1[3], range); + temp0[1] = ClampValue(temp1[1] + temp1[2], range); + temp0[2] = ClampValue(temp1[1] - temp1[2], range); + temp0[3] = ClampValue(temp1[0] - temp1[3], range); + temp0[4] = temp1[4]; + temp0[5] = HalfButterfly(-cospi[32], temp1[5], cospi[32], temp1[6], cosBit); + temp0[6] = HalfButterfly(cospi[32], temp1[5], cospi[32], temp1[6], cosBit); + temp0[7] = temp1[7]; + temp0[8] = ClampValue(temp1[8] + temp1[11], range); + temp0[9] = ClampValue(temp1[9] + temp1[10], range); + temp0[10] = ClampValue(temp1[9] - temp1[10], range); + temp0[11] = ClampValue(temp1[8] - temp1[11], range); + temp0[12] = ClampValue(-temp1[12] + temp1[15], range); + temp0[13] = ClampValue(-temp1[13] + temp1[14], range); + temp0[14] = ClampValue(temp1[13] + temp1[14], range); + temp0[15] = ClampValue(temp1[12] + temp1[15], range); + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = HalfButterfly(-cospi[16], temp1[18], cospi[48], temp1[29], cosBit); + temp0[19] = HalfButterfly(-cospi[16], temp1[19], cospi[48], temp1[28], cosBit); + temp0[20] = HalfButterfly(-cospi[48], temp1[20], -cospi[16], temp1[27], cosBit); + temp0[21] = HalfButterfly(-cospi[48], temp1[21], -cospi[16], temp1[26], cosBit); + temp0[22] = temp1[22]; + temp0[23] = temp1[23]; + temp0[24] = temp1[24]; + temp0[25] = temp1[25]; + temp0[26] = HalfButterfly(-cospi[16], temp1[21], cospi[48], temp1[26], cosBit); + temp0[27] = HalfButterfly(-cospi[16], temp1[20], cospi[48], temp1[27], cosBit); + temp0[28] = HalfButterfly(cospi[48], temp1[19], cospi[16], temp1[28], cosBit); + temp0[29] = HalfButterfly(cospi[48], temp1[18], cospi[16], temp1[29], cosBit); + temp0[30] = temp1[30]; + temp0[31] = temp1[31]; + + // range_check_buf(stage, input, bf1, size, range); + + // stage 7 + stage++; + range = stageRange[stage]; + temp1[0] = ClampValue(temp0[0] + temp0[7], range); + temp1[1] = ClampValue(temp0[1] + temp0[6], range); + temp1[2] = ClampValue(temp0[2] + temp0[5], range); + temp1[3] = ClampValue(temp0[3] + temp0[4], range); + temp1[4] = ClampValue(temp0[3] - temp0[4], range); + temp1[5] = ClampValue(temp0[2] - temp0[5], range); + temp1[6] = ClampValue(temp0[1] - temp0[6], range); + temp1[7] = ClampValue(temp0[0] - temp0[7], range); + temp1[8] = temp0[8]; + temp1[9] = temp0[9]; + temp1[10] = HalfButterfly(-cospi[32], temp0[10], cospi[32], temp0[13], cosBit); + temp1[11] = HalfButterfly(-cospi[32], temp0[11], cospi[32], temp0[12], cosBit); + temp1[12] = HalfButterfly(cospi[32], temp0[11], cospi[32], temp0[12], cosBit); + temp1[13] = HalfButterfly(cospi[32], temp0[10], cospi[32], temp0[13], cosBit); + temp1[14] = temp0[14]; + temp1[15] = temp0[15]; + temp1[16] = ClampValue(temp0[16] + temp0[23], range); + temp1[17] = ClampValue(temp0[17] + temp0[22], range); + temp1[18] = ClampValue(temp0[18] + temp0[21], range); + temp1[19] = ClampValue(temp0[19] + temp0[20], range); + temp1[20] = ClampValue(temp0[19] - temp0[20], range); + temp1[21] = ClampValue(temp0[18] - temp0[21], range); + temp1[22] = ClampValue(temp0[17] - temp0[22], range); + temp1[23] = ClampValue(temp0[16] - temp0[23], range); + temp1[24] = ClampValue(-temp0[24] + temp0[31], range); + temp1[25] = ClampValue(-temp0[25] + temp0[30], range); + temp1[26] = ClampValue(-temp0[26] + temp0[29], range); + temp1[27] = ClampValue(-temp0[27] + temp0[28], range); + temp1[28] = ClampValue(temp0[27] + temp0[28], range); + temp1[29] = ClampValue(temp0[26] + temp0[29], range); + temp1[30] = ClampValue(temp0[25] + temp0[30], range); + temp1[31] = ClampValue(temp0[24] + temp0[31], range); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 8 + stage++; + range = stageRange[stage]; + temp0[0] = ClampValue(temp1[0] + temp1[15], range); + temp0[1] = ClampValue(temp1[1] + temp1[14], range); + temp0[2] = ClampValue(temp1[2] + temp1[13], range); + temp0[3] = ClampValue(temp1[3] + temp1[12], range); + temp0[4] = ClampValue(temp1[4] + temp1[11], range); + temp0[5] = ClampValue(temp1[5] + temp1[10], range); + temp0[6] = ClampValue(temp1[6] + temp1[9], range); + temp0[7] = ClampValue(temp1[7] + temp1[8], range); + temp0[8] = ClampValue(temp1[7] - temp1[8], range); + temp0[9] = ClampValue(temp1[6] - temp1[9], range); + temp0[10] = ClampValue(temp1[5] - temp1[10], range); + temp0[11] = ClampValue(temp1[4] - temp1[11], range); + temp0[12] = ClampValue(temp1[3] - temp1[12], range); + temp0[13] = ClampValue(temp1[2] - temp1[13], range); + temp0[14] = ClampValue(temp1[1] - temp1[14], range); + temp0[15] = ClampValue(temp1[0] - temp1[15], range); + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = temp1[18]; + temp0[19] = temp1[19]; + temp0[20] = HalfButterfly(-cospi[32], temp1[20], cospi[32], temp1[27], cosBit); + temp0[21] = HalfButterfly(-cospi[32], temp1[21], cospi[32], temp1[26], cosBit); + temp0[22] = HalfButterfly(-cospi[32], temp1[22], cospi[32], temp1[25], cosBit); + temp0[23] = HalfButterfly(-cospi[32], temp1[23], cospi[32], temp1[24], cosBit); + temp0[24] = HalfButterfly(cospi[32], temp1[23], cospi[32], temp1[24], cosBit); + temp0[25] = HalfButterfly(cospi[32], temp1[22], cospi[32], temp1[25], cosBit); + temp0[26] = HalfButterfly(cospi[32], temp1[21], cospi[32], temp1[26], cosBit); + temp0[27] = HalfButterfly(cospi[32], temp1[20], cospi[32], temp1[27], cosBit); + temp0[28] = temp1[28]; + temp0[29] = temp1[29]; + temp0[30] = temp1[30]; + temp0[31] = temp1[31]; + + // range_check_buf(stage, input, bf1, size, range); + + // stage 9 + stage++; + range = stageRange[stage]; + Unsafe.Add(ref output, 0) = ClampValue(temp0[0] + temp0[31], range); + Unsafe.Add(ref output, 1) = ClampValue(temp0[1] + temp0[30], range); + Unsafe.Add(ref output, 2) = ClampValue(temp0[2] + temp0[29], range); + Unsafe.Add(ref output, 3) = ClampValue(temp0[3] + temp0[28], range); + Unsafe.Add(ref output, 4) = ClampValue(temp0[4] + temp0[27], range); + Unsafe.Add(ref output, 5) = ClampValue(temp0[5] + temp0[26], range); + Unsafe.Add(ref output, 6) = ClampValue(temp0[6] + temp0[25], range); + Unsafe.Add(ref output, 7) = ClampValue(temp0[7] + temp0[24], range); + Unsafe.Add(ref output, 8) = ClampValue(temp0[8] + temp0[23], range); + Unsafe.Add(ref output, 9) = ClampValue(temp0[9] + temp0[22], range); + Unsafe.Add(ref output, 10) = ClampValue(temp0[10] + temp0[21], range); + Unsafe.Add(ref output, 11) = ClampValue(temp0[11] + temp0[20], range); + Unsafe.Add(ref output, 12) = ClampValue(temp0[12] + temp0[19], range); + Unsafe.Add(ref output, 13) = ClampValue(temp0[13] + temp0[18], range); + Unsafe.Add(ref output, 14) = ClampValue(temp0[14] + temp0[17], range); + Unsafe.Add(ref output, 15) = ClampValue(temp0[15] + temp0[16], range); + Unsafe.Add(ref output, 16) = ClampValue(temp0[15] - temp0[16], range); + Unsafe.Add(ref output, 17) = ClampValue(temp0[14] - temp0[17], range); + Unsafe.Add(ref output, 18) = ClampValue(temp0[13] - temp0[18], range); + Unsafe.Add(ref output, 19) = ClampValue(temp0[12] - temp0[19], range); + Unsafe.Add(ref output, 20) = ClampValue(temp0[11] - temp0[20], range); + Unsafe.Add(ref output, 21) = ClampValue(temp0[10] - temp0[21], range); + Unsafe.Add(ref output, 22) = ClampValue(temp0[9] - temp0[22], range); + Unsafe.Add(ref output, 23) = ClampValue(temp0[8] - temp0[23], range); + Unsafe.Add(ref output, 24) = ClampValue(temp0[7] - temp0[24], range); + Unsafe.Add(ref output, 25) = ClampValue(temp0[6] - temp0[25], range); + Unsafe.Add(ref output, 26) = ClampValue(temp0[5] - temp0[26], range); + Unsafe.Add(ref output, 27) = ClampValue(temp0[4] - temp0[27], range); + Unsafe.Add(ref output, 28) = ClampValue(temp0[3] - temp0[28], range); + Unsafe.Add(ref output, 29) = ClampValue(temp0[2] - temp0[29], range); + Unsafe.Add(ref output, 30) = ClampValue(temp0[1] - temp0[30], range); + Unsafe.Add(ref output, 31) = ClampValue(temp0[0] - temp0[31], range); + } + + internal static int ClampValue(int value, byte bit) + { + if (bit <= 0) + { + return value; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + return (int)Av1Math.Clamp(value, min_value, max_value); + } + + internal static int HalfButterfly(int w0, int in0, int w1, int in1, int bit) + { + long result64 = (long)(w0 * in0) + (w1 * in1); + long intermediate = result64 + (1L << (bit - 1)); + + // NOTE(david.barker): The value 'result_64' may not necessarily fit + // into 32 bits. However, the result of this function is nominally + // ROUND_POWER_OF_TWO_64(result_64, bit) + // and that is required to fit into range many bits + // (checked by range_check_buf()). + // + // Here we've unpacked that rounding operation, and it can be shown + // that the value of 'intermediate' here *does* fit into 32 bits + // for any conformant bitstream. + // The upshot is that, if you do all this calculation using + // wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic, + // then you'll still get the correct result. + return (int)(intermediate >> bit); + } +} diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct4Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct4Inverse1dTransformer.cs index ca19e188b5..0c0cd25b6c 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct4Inverse1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct4Inverse1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; -internal class Av1Dct4Inverse1dTransformer : IAv1Forward1dTransformer +internal class Av1Dct4Inverse1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { @@ -46,10 +46,11 @@ internal class Av1Dct4Inverse1dTransformer : IAv1Forward1dTransformer // stage 3 stage++; - Unsafe.Add(ref output, 0) = ClampValue(temp1[0] + temp1[3], stageRange[stage]); - Unsafe.Add(ref output, 1) = ClampValue(temp1[1] + temp1[2], stageRange[stage]); - Unsafe.Add(ref output, 2) = ClampValue(temp1[1] - temp1[2], stageRange[stage]); - Unsafe.Add(ref output, 3) = ClampValue(temp1[0] - temp1[3], stageRange[stage]); + Unsafe.Add(ref output, 0) = temp1[0] + temp1[3]; + Unsafe.Add(ref output, 1) = temp1[1] + temp1[2]; + Unsafe.Add(ref output, 2) = temp1[1] - temp1[2]; + Unsafe.Add(ref output, 3) = temp1[0] - temp1[3]; + ClampBuffer4(ref output, stageRange[stage]); } internal static int ClampValue(int value, byte bit) @@ -64,6 +65,114 @@ internal class Av1Dct4Inverse1dTransformer : IAv1Forward1dTransformer return (int)Av1Math.Clamp(value, min_value, max_value); } + internal static void ClampBuffer4(ref int buffer, byte bit) + { + if (bit <= 0) + { + return; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + + Unsafe.Add(ref buffer, 0) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 0), min_value, max_value); + Unsafe.Add(ref buffer, 1) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 1), min_value, max_value); + Unsafe.Add(ref buffer, 2) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 2), min_value, max_value); + Unsafe.Add(ref buffer, 3) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 3), min_value, max_value); + } + + internal static void ClampBuffer8(ref int buffer, byte bit) + { + if (bit <= 0) + { + return; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + + Unsafe.Add(ref buffer, 0) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 0), min_value, max_value); + Unsafe.Add(ref buffer, 1) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 1), min_value, max_value); + Unsafe.Add(ref buffer, 2) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 2), min_value, max_value); + Unsafe.Add(ref buffer, 3) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 3), min_value, max_value); + Unsafe.Add(ref buffer, 4) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 4), min_value, max_value); + Unsafe.Add(ref buffer, 5) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 5), min_value, max_value); + Unsafe.Add(ref buffer, 6) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 6), min_value, max_value); + Unsafe.Add(ref buffer, 7) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 7), min_value, max_value); + } + + internal static void ClampBuffer16(ref int buffer, byte bit) + { + if (bit <= 0) + { + return; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + + Unsafe.Add(ref buffer, 0) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 0), min_value, max_value); + Unsafe.Add(ref buffer, 1) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 1), min_value, max_value); + Unsafe.Add(ref buffer, 2) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 2), min_value, max_value); + Unsafe.Add(ref buffer, 3) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 3), min_value, max_value); + Unsafe.Add(ref buffer, 4) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 4), min_value, max_value); + Unsafe.Add(ref buffer, 5) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 5), min_value, max_value); + Unsafe.Add(ref buffer, 6) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 6), min_value, max_value); + Unsafe.Add(ref buffer, 7) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 7), min_value, max_value); + Unsafe.Add(ref buffer, 8) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 8), min_value, max_value); + Unsafe.Add(ref buffer, 9) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 9), min_value, max_value); + Unsafe.Add(ref buffer, 10) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 10), min_value, max_value); + Unsafe.Add(ref buffer, 11) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 11), min_value, max_value); + Unsafe.Add(ref buffer, 12) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 12), min_value, max_value); + Unsafe.Add(ref buffer, 13) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 13), min_value, max_value); + Unsafe.Add(ref buffer, 14) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 14), min_value, max_value); + Unsafe.Add(ref buffer, 15) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 15), min_value, max_value); + } + + internal static void ClampBuffer32(ref int buffer, byte bit) + { + if (bit <= 0) + { + return; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + + Unsafe.Add(ref buffer, 0) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 0), min_value, max_value); + Unsafe.Add(ref buffer, 1) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 1), min_value, max_value); + Unsafe.Add(ref buffer, 2) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 2), min_value, max_value); + Unsafe.Add(ref buffer, 3) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 3), min_value, max_value); + Unsafe.Add(ref buffer, 4) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 4), min_value, max_value); + Unsafe.Add(ref buffer, 5) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 5), min_value, max_value); + Unsafe.Add(ref buffer, 6) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 6), min_value, max_value); + Unsafe.Add(ref buffer, 7) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 7), min_value, max_value); + Unsafe.Add(ref buffer, 8) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 8), min_value, max_value); + Unsafe.Add(ref buffer, 9) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 9), min_value, max_value); + Unsafe.Add(ref buffer, 10) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 10), min_value, max_value); + Unsafe.Add(ref buffer, 11) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 11), min_value, max_value); + Unsafe.Add(ref buffer, 12) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 12), min_value, max_value); + Unsafe.Add(ref buffer, 13) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 13), min_value, max_value); + Unsafe.Add(ref buffer, 14) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 14), min_value, max_value); + Unsafe.Add(ref buffer, 15) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 15), min_value, max_value); + Unsafe.Add(ref buffer, 16) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 16), min_value, max_value); + Unsafe.Add(ref buffer, 17) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 17), min_value, max_value); + Unsafe.Add(ref buffer, 18) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 18), min_value, max_value); + Unsafe.Add(ref buffer, 19) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 19), min_value, max_value); + Unsafe.Add(ref buffer, 20) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 20), min_value, max_value); + Unsafe.Add(ref buffer, 21) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 21), min_value, max_value); + Unsafe.Add(ref buffer, 22) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 22), min_value, max_value); + Unsafe.Add(ref buffer, 23) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 23), min_value, max_value); + Unsafe.Add(ref buffer, 24) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 24), min_value, max_value); + Unsafe.Add(ref buffer, 25) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 25), min_value, max_value); + Unsafe.Add(ref buffer, 26) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 26), min_value, max_value); + Unsafe.Add(ref buffer, 27) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 27), min_value, max_value); + Unsafe.Add(ref buffer, 28) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 28), min_value, max_value); + Unsafe.Add(ref buffer, 29) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 29), min_value, max_value); + Unsafe.Add(ref buffer, 30) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 30), min_value, max_value); + Unsafe.Add(ref buffer, 31) = (int)Av1Math.Clamp(Unsafe.Add(ref buffer, 31), min_value, max_value); + } + internal static int HalfButterfly(int w0, int in0, int w1, int in1, int bit) { long result64 = (long)(w0 * in0) + (w1 * in1); diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct64Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct64Inverse1dTransformer.cs new file mode 100644 index 0000000000..7218f5659e --- /dev/null +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct64Inverse1dTransformer.cs @@ -0,0 +1,827 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; + +internal class Av1Dct64Inverse1dTransformer : IAv1Transformer1d +{ + public void Transform(Span input, Span output, int cosBit, Span stageRange) + { + Guard.MustBeSizedAtLeast(input, 64, nameof(input)); + Guard.MustBeSizedAtLeast(output, 64, nameof(output)); + TransformScalar(ref input[0], ref output[0], cosBit, stageRange); + } + + /// + /// SVT: svt_av1_idct64_new + /// + private static void TransformScalar(ref int input, ref int output, int cosBit, Span stageRange) + { + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + int stage = 0; + Span temp0 = stackalloc int[64]; + Span temp1 = stackalloc int[64]; + + // stage 0; + + // stage 1; + stage++; + temp1[0] = Unsafe.Add(ref input, 0); + temp1[1] = Unsafe.Add(ref input, 32); + temp1[2] = Unsafe.Add(ref input, 16); + temp1[3] = Unsafe.Add(ref input, 48); + temp1[4] = Unsafe.Add(ref input, 8); + temp1[5] = Unsafe.Add(ref input, 40); + temp1[6] = Unsafe.Add(ref input, 24); + temp1[7] = Unsafe.Add(ref input, 56); + temp1[8] = Unsafe.Add(ref input, 4); + temp1[9] = Unsafe.Add(ref input, 36); + temp1[10] = Unsafe.Add(ref input, 20); + temp1[11] = Unsafe.Add(ref input, 52); + temp1[12] = Unsafe.Add(ref input, 12); + temp1[13] = Unsafe.Add(ref input, 44); + temp1[14] = Unsafe.Add(ref input, 28); + temp1[15] = Unsafe.Add(ref input, 60); + temp1[16] = Unsafe.Add(ref input, 2); + temp1[17] = Unsafe.Add(ref input, 34); + temp1[18] = Unsafe.Add(ref input, 18); + temp1[19] = Unsafe.Add(ref input, 50); + temp1[20] = Unsafe.Add(ref input, 10); + temp1[21] = Unsafe.Add(ref input, 42); + temp1[22] = Unsafe.Add(ref input, 26); + temp1[23] = Unsafe.Add(ref input, 58); + temp1[24] = Unsafe.Add(ref input, 6); + temp1[25] = Unsafe.Add(ref input, 38); + temp1[26] = Unsafe.Add(ref input, 22); + temp1[27] = Unsafe.Add(ref input, 54); + temp1[28] = Unsafe.Add(ref input, 14); + temp1[29] = Unsafe.Add(ref input, 46); + temp1[30] = Unsafe.Add(ref input, 30); + temp1[31] = Unsafe.Add(ref input, 62); + temp1[32] = Unsafe.Add(ref input, 1); + temp1[33] = Unsafe.Add(ref input, 33); + temp1[34] = Unsafe.Add(ref input, 17); + temp1[35] = Unsafe.Add(ref input, 49); + temp1[36] = Unsafe.Add(ref input, 9); + temp1[37] = Unsafe.Add(ref input, 41); + temp1[38] = Unsafe.Add(ref input, 25); + temp1[39] = Unsafe.Add(ref input, 57); + temp1[40] = Unsafe.Add(ref input, 5); + temp1[41] = Unsafe.Add(ref input, 37); + temp1[42] = Unsafe.Add(ref input, 21); + temp1[43] = Unsafe.Add(ref input, 53); + temp1[44] = Unsafe.Add(ref input, 13); + temp1[45] = Unsafe.Add(ref input, 45); + temp1[46] = Unsafe.Add(ref input, 29); + temp1[47] = Unsafe.Add(ref input, 61); + temp1[48] = Unsafe.Add(ref input, 3); + temp1[49] = Unsafe.Add(ref input, 35); + temp1[50] = Unsafe.Add(ref input, 19); + temp1[51] = Unsafe.Add(ref input, 51); + temp1[52] = Unsafe.Add(ref input, 11); + temp1[53] = Unsafe.Add(ref input, 43); + temp1[54] = Unsafe.Add(ref input, 27); + temp1[55] = Unsafe.Add(ref input, 59); + temp1[56] = Unsafe.Add(ref input, 7); + temp1[57] = Unsafe.Add(ref input, 39); + temp1[58] = Unsafe.Add(ref input, 23); + temp1[59] = Unsafe.Add(ref input, 55); + temp1[60] = Unsafe.Add(ref input, 15); + temp1[61] = Unsafe.Add(ref input, 47); + temp1[62] = Unsafe.Add(ref input, 31); + temp1[63] = Unsafe.Add(ref input, 63); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 2 + stage++; + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = temp1[10]; + temp0[11] = temp1[11]; + temp0[12] = temp1[12]; + temp0[13] = temp1[13]; + temp0[14] = temp1[14]; + temp0[15] = temp1[15]; + temp0[16] = temp1[16]; + temp0[17] = temp1[17]; + temp0[18] = temp1[18]; + temp0[19] = temp1[19]; + temp0[20] = temp1[20]; + temp0[21] = temp1[21]; + temp0[22] = temp1[22]; + temp0[23] = temp1[23]; + temp0[24] = temp1[24]; + temp0[25] = temp1[25]; + temp0[26] = temp1[26]; + temp0[27] = temp1[27]; + temp0[28] = temp1[28]; + temp0[29] = temp1[29]; + temp0[30] = temp1[30]; + temp0[31] = temp1[31]; + temp0[32] = HalfButterfly(cospi[63], temp1[32], -cospi[1], temp1[63], cosBit); + temp0[33] = HalfButterfly(cospi[31], temp1[33], -cospi[33], temp1[63], cosBit); + temp0[34] = HalfButterfly(cospi[47], temp1[34], -cospi[17], temp1[61], cosBit); + temp0[35] = HalfButterfly(cospi[15], temp1[35], -cospi[49], temp1[60], cosBit); + temp0[36] = HalfButterfly(cospi[55], temp1[36], -cospi[9], temp1[59], cosBit); + temp0[37] = HalfButterfly(cospi[23], temp1[37], -cospi[41], temp1[58], cosBit); + temp0[38] = HalfButterfly(cospi[39], temp1[38], -cospi[25], temp1[57], cosBit); + temp0[39] = HalfButterfly(cospi[7], temp1[39], -cospi[57], temp1[56], cosBit); + temp0[40] = HalfButterfly(cospi[59], temp1[40], -cospi[5], temp1[55], cosBit); + temp0[41] = HalfButterfly(cospi[27], temp1[41], -cospi[37], temp1[54], cosBit); + temp0[42] = HalfButterfly(cospi[43], temp1[42], -cospi[21], temp1[53], cosBit); + temp0[43] = HalfButterfly(cospi[11], temp1[43], -cospi[53], temp1[52], cosBit); + temp0[44] = HalfButterfly(cospi[51], temp1[44], -cospi[13], temp1[51], cosBit); + temp0[45] = HalfButterfly(cospi[19], temp1[45], -cospi[45], temp1[50], cosBit); + temp0[46] = HalfButterfly(cospi[35], temp1[46], -cospi[29], temp1[49], cosBit); + temp0[47] = HalfButterfly(cospi[3], temp1[47], -cospi[61], temp1[48], cosBit); + temp0[48] = HalfButterfly(cospi[61], temp1[47], cospi[3], temp1[48], cosBit); + temp0[49] = HalfButterfly(cospi[29], temp1[46], cospi[35], temp1[49], cosBit); + temp0[50] = HalfButterfly(cospi[45], temp1[45], cospi[19], temp1[50], cosBit); + temp0[51] = HalfButterfly(cospi[13], temp1[44], cospi[51], temp1[51], cosBit); + temp0[52] = HalfButterfly(cospi[53], temp1[43], cospi[11], temp1[52], cosBit); + temp0[53] = HalfButterfly(cospi[21], temp1[42], cospi[43], temp1[53], cosBit); + temp0[54] = HalfButterfly(cospi[37], temp1[41], cospi[27], temp1[54], cosBit); + temp0[55] = HalfButterfly(cospi[5], temp1[40], cospi[59], temp1[55], cosBit); + temp0[56] = HalfButterfly(cospi[57], temp1[39], cospi[7], temp1[56], cosBit); + temp0[57] = HalfButterfly(cospi[25], temp1[38], cospi[39], temp1[57], cosBit); + temp0[58] = HalfButterfly(cospi[41], temp1[37], cospi[23], temp1[58], cosBit); + temp0[59] = HalfButterfly(cospi[9], temp1[36], cospi[55], temp1[59], cosBit); + temp0[60] = HalfButterfly(cospi[49], temp1[35], cospi[15], temp1[60], cosBit); + temp0[61] = HalfButterfly(cospi[17], temp1[34], cospi[47], temp1[61], cosBit); + temp0[62] = HalfButterfly(cospi[33], temp1[33], cospi[31], temp1[62], cosBit); + temp0[63] = HalfButterfly(cospi[1], temp1[32], cospi[63], temp1[63], cosBit); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 3 + stage++; + byte range = stageRange[stage]; + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = temp0[4]; + temp1[5] = temp0[5]; + temp1[6] = temp0[6]; + temp1[7] = temp0[7]; + temp1[8] = temp0[8]; + temp1[9] = temp0[9]; + temp1[10] = temp0[10]; + temp1[11] = temp0[11]; + temp1[12] = temp0[12]; + temp1[13] = temp0[13]; + temp1[14] = temp0[14]; + temp1[15] = temp0[15]; + temp1[16] = HalfButterfly(cospi[62], temp0[16], -cospi[2], temp0[31], cosBit); + temp1[17] = HalfButterfly(cospi[30], temp0[17], -cospi[34], temp0[30], cosBit); + temp1[18] = HalfButterfly(cospi[46], temp0[18], -cospi[18], temp0[29], cosBit); + temp1[19] = HalfButterfly(cospi[14], temp0[19], -cospi[50], temp0[28], cosBit); + temp1[20] = HalfButterfly(cospi[54], temp0[20], -cospi[10], temp0[27], cosBit); + temp1[21] = HalfButterfly(cospi[22], temp0[21], -cospi[42], temp0[26], cosBit); + temp1[22] = HalfButterfly(cospi[38], temp0[22], -cospi[26], temp0[25], cosBit); + temp1[23] = HalfButterfly(cospi[6], temp0[23], -cospi[58], temp0[24], cosBit); + temp1[24] = HalfButterfly(cospi[58], temp0[23], cospi[6], temp0[24], cosBit); + temp1[25] = HalfButterfly(cospi[26], temp0[22], cospi[38], temp0[25], cosBit); + temp1[26] = HalfButterfly(cospi[42], temp0[21], cospi[22], temp0[26], cosBit); + temp1[27] = HalfButterfly(cospi[10], temp0[20], cospi[54], temp0[27], cosBit); + temp1[28] = HalfButterfly(cospi[50], temp0[19], cospi[14], temp0[28], cosBit); + temp1[29] = HalfButterfly(cospi[18], temp0[18], cospi[46], temp0[29], cosBit); + temp1[30] = HalfButterfly(cospi[34], temp0[17], cospi[30], temp0[30], cosBit); + temp1[31] = HalfButterfly(cospi[2], temp0[16], cospi[62], temp0[31], cosBit); + temp1[32] = ClampValue(temp0[32] + temp0[33], range); + temp1[33] = ClampValue(temp0[32] - temp0[33], range); + temp1[34] = ClampValue(-temp0[34] + temp0[35], range); + temp1[35] = ClampValue(temp0[34] + temp0[35], range); + temp1[36] = ClampValue(temp0[36] + temp0[37], range); + temp1[37] = ClampValue(temp0[36] - temp0[37], range); + temp1[38] = ClampValue(-temp0[38] + temp0[39], range); + temp1[39] = ClampValue(temp0[38] + temp0[39], range); + temp1[40] = ClampValue(temp0[40] + temp0[41], range); + temp1[41] = ClampValue(temp0[40] - temp0[41], range); + temp1[42] = ClampValue(-temp0[42] + temp0[43], range); + temp1[43] = ClampValue(temp0[42] + temp0[43], range); + temp1[44] = ClampValue(temp0[44] + temp0[45], range); + temp1[45] = ClampValue(temp0[44] - temp0[45], range); + temp1[46] = ClampValue(-temp0[46] + temp0[47], range); + temp1[47] = ClampValue(temp0[46] + temp0[47], range); + temp1[48] = ClampValue(temp0[48] + temp0[49], range); + temp1[49] = ClampValue(temp0[48] - temp0[49], range); + temp1[50] = ClampValue(-temp0[50] + temp0[51], range); + temp1[51] = ClampValue(temp0[50] + temp0[51], range); + temp1[52] = ClampValue(temp0[52] + temp0[53], range); + temp1[53] = ClampValue(temp0[52] - temp0[53], range); + temp1[54] = ClampValue(-temp0[54] + temp0[55], range); + temp1[55] = ClampValue(temp0[54] + temp0[55], range); + temp1[56] = ClampValue(temp0[56] + temp0[57], range); + temp1[57] = ClampValue(temp0[56] - temp0[57], range); + temp1[58] = ClampValue(-temp0[58] + temp0[59], range); + temp1[59] = ClampValue(temp0[58] + temp0[59], range); + temp1[60] = ClampValue(temp0[60] + temp0[61], range); + temp1[61] = ClampValue(temp0[60] - temp0[61], range); + temp1[62] = ClampValue(-temp0[62] + temp0[63], range); + temp1[63] = ClampValue(temp0[62] + temp0[63], range); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 4 + stage++; + range = stageRange[stage]; + temp0[0] = temp1[0]; + temp0[1] = temp1[1]; + temp0[2] = temp1[2]; + temp0[3] = temp1[3]; + temp0[4] = temp1[4]; + temp0[5] = temp1[5]; + temp0[6] = temp1[6]; + temp0[7] = temp1[7]; + temp0[8] = HalfButterfly(cospi[60], temp1[8], -cospi[4], temp1[15], cosBit); + temp0[9] = HalfButterfly(cospi[28], temp1[9], -cospi[36], temp1[14], cosBit); + temp0[10] = HalfButterfly(cospi[44], temp1[10], -cospi[20], temp1[13], cosBit); + temp0[11] = HalfButterfly(cospi[12], temp1[11], -cospi[52], temp0[12], cosBit); + temp0[12] = HalfButterfly(cospi[52], temp1[11], cospi[12], temp1[12], cosBit); + temp0[13] = HalfButterfly(cospi[20], temp1[10], cospi[44], temp1[13], cosBit); + temp0[14] = HalfButterfly(cospi[36], temp1[9], cospi[28], temp1[14], cosBit); + temp0[15] = HalfButterfly(cospi[4], temp1[8], cospi[60], temp0[15], cosBit); + temp0[16] = ClampValue(temp1[16] + temp1[17], range); + temp0[17] = ClampValue(temp1[16] - temp1[17], range); + temp0[18] = ClampValue(-temp1[18] + temp1[19], range); + temp0[19] = ClampValue(temp1[18] + temp1[19], range); + temp0[20] = ClampValue(temp1[20] + temp1[21], range); + temp0[21] = ClampValue(temp1[20] - temp1[21], range); + temp0[22] = ClampValue(-temp1[22] + temp1[23], range); + temp0[23] = ClampValue(temp1[22] + temp1[23], range); + temp0[24] = ClampValue(temp1[24] + temp1[25], range); + temp0[25] = ClampValue(temp1[24] - temp1[25], range); + temp0[26] = ClampValue(-temp1[26] + temp1[27], range); + temp0[27] = ClampValue(temp1[26] + temp1[27], range); + temp0[28] = ClampValue(temp1[28] + temp1[29], range); + temp0[29] = ClampValue(temp1[28] - temp1[29], range); + temp0[30] = ClampValue(-temp1[30] + temp1[31], range); + temp0[31] = ClampValue(temp1[30] + temp1[31], range); + temp0[32] = temp1[32]; + temp0[33] = HalfButterfly(-cospi[4], temp1[33], cospi[60], temp1[62], cosBit); + temp0[34] = HalfButterfly(-cospi[60], temp1[34], -cospi[4], temp1[61], cosBit); + temp0[35] = temp1[35]; + temp0[36] = temp1[36]; + temp0[37] = HalfButterfly(-cospi[36], temp1[37], cospi[28], temp1[58], cosBit); + temp0[38] = HalfButterfly(-cospi[28], temp1[38], -cospi[36], temp1[57], cosBit); + temp0[39] = temp1[39]; + temp0[40] = temp1[40]; + temp0[41] = HalfButterfly(-cospi[20], temp1[41], cospi[44], temp1[54], cosBit); + temp0[42] = HalfButterfly(-cospi[44], temp1[42], -cospi[20], temp1[53], cosBit); + temp0[43] = temp1[43]; + temp0[44] = temp1[44]; + temp0[45] = HalfButterfly(-cospi[52], temp1[45], cospi[12], temp1[50], cosBit); + temp0[46] = HalfButterfly(-cospi[12], temp1[46], -cospi[52], temp1[49], cosBit); + temp0[47] = temp1[47]; + temp0[48] = temp1[48]; + temp0[49] = HalfButterfly(-cospi[52], temp1[46], cospi[12], temp1[49], cosBit); + temp0[50] = HalfButterfly(cospi[12], temp1[45], cospi[52], temp1[50], cosBit); + temp0[51] = temp1[51]; + temp0[52] = temp1[52]; + temp0[53] = HalfButterfly(-cospi[20], temp1[42], cospi[44], temp1[53], cosBit); + temp0[54] = HalfButterfly(cospi[44], temp1[41], cospi[20], temp1[54], cosBit); + temp0[55] = temp1[55]; + temp0[56] = temp1[56]; + temp0[57] = HalfButterfly(-cospi[36], temp1[38], cospi[28], temp1[57], cosBit); + temp0[58] = HalfButterfly(cospi[28], temp1[37], cospi[36], temp1[58], cosBit); + temp0[59] = temp1[59]; + temp0[60] = temp1[60]; + temp0[61] = HalfButterfly(-cospi[4], temp1[34], cospi[60], temp1[61], cosBit); + temp0[62] = HalfButterfly(cospi[60], temp1[33], cospi[4], temp1[62], cosBit); + temp0[63] = temp1[63]; + + // range_check_buf(stage, input, bf1, size, range); + + // stage 5 + stage++; + range = stageRange[stage]; + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = HalfButterfly(cospi[56], temp0[4], -cospi[8], temp0[7], cosBit); + temp1[5] = HalfButterfly(cospi[24], temp0[5], -cospi[40], temp0[6], cosBit); + temp1[6] = HalfButterfly(cospi[40], temp0[5], cospi[24], temp0[6], cosBit); + temp1[7] = HalfButterfly(cospi[8], temp0[4], cospi[56], temp0[7], cosBit); + temp1[8] = ClampValue(temp0[8] + temp0[9], range); + temp1[9] = ClampValue(temp0[8] - temp0[9], range); + temp1[10] = ClampValue(-temp0[10] + temp0[11], range); + temp1[11] = ClampValue(temp0[10] + temp0[11], range); + temp1[12] = ClampValue(temp0[12] + temp0[13], range); + temp1[13] = ClampValue(temp0[12] - temp0[13], range); + temp1[14] = ClampValue(-temp0[14] + temp0[15], range); + temp1[15] = ClampValue(temp0[14] + temp0[15], range); + temp1[16] = temp0[16]; + temp1[17] = HalfButterfly(-cospi[8], temp0[17], cospi[56], temp0[30], cosBit); + temp1[18] = HalfButterfly(-cospi[56], temp0[18], -cospi[8], temp0[29], cosBit); + temp1[19] = temp0[19]; + temp1[20] = temp0[20]; + temp1[21] = HalfButterfly(-cospi[40], temp0[21], cospi[24], temp0[26], cosBit); + temp1[22] = HalfButterfly(-cospi[24], temp0[22], -cospi[40], temp0[25], cosBit); + temp1[23] = temp0[23]; + temp1[24] = temp0[24]; + temp1[25] = HalfButterfly(-cospi[40], temp0[22], cospi[24], temp0[25], cosBit); + temp1[26] = HalfButterfly(cospi[24], temp0[21], cospi[40], temp0[26], cosBit); + temp1[27] = temp0[27]; + temp1[28] = temp0[28]; + temp1[29] = HalfButterfly(-cospi[8], temp0[18], cospi[56], temp0[29], cosBit); + temp1[30] = HalfButterfly(cospi[56], temp0[17], cospi[8], temp0[30], cosBit); + temp1[31] = temp0[31]; + temp1[32] = ClampValue(temp0[32] + temp0[35], range); + temp1[33] = ClampValue(temp0[33] + temp0[34], range); + temp1[34] = ClampValue(temp0[33] - temp0[34], range); + temp1[35] = ClampValue(temp0[32] - temp0[35], range); + temp1[36] = ClampValue(-temp0[36] + temp0[39], range); + temp1[37] = ClampValue(-temp0[37] + temp0[38], range); + temp1[38] = ClampValue(temp0[37] + temp0[38], range); + temp1[39] = ClampValue(temp0[36] + temp0[39], range); + temp1[40] = ClampValue(temp0[40] + temp0[43], range); + temp1[41] = ClampValue(temp0[41] + temp0[42], range); + temp1[42] = ClampValue(temp0[41] - temp0[42], range); + temp1[43] = ClampValue(temp0[40] - temp0[43], range); + temp1[44] = ClampValue(-temp0[44] + temp0[47], range); + temp1[45] = ClampValue(-temp0[45] + temp0[46], range); + temp1[46] = ClampValue(temp0[45] + temp0[46], range); + temp1[47] = ClampValue(temp0[44] + temp0[47], range); + temp1[48] = ClampValue(temp0[48] + temp0[51], range); + temp1[49] = ClampValue(temp0[49] + temp0[50], range); + temp1[50] = ClampValue(temp0[49] - temp0[50], range); + temp1[51] = ClampValue(temp0[48] - temp0[51], range); + temp1[52] = ClampValue(-temp0[52] + temp0[55], range); + temp1[53] = ClampValue(-temp0[53] + temp0[54], range); + temp1[54] = ClampValue(temp0[53] + temp0[54], range); + temp1[55] = ClampValue(temp0[52] + temp0[55], range); + temp1[56] = ClampValue(temp0[56] + temp0[59], range); + temp1[57] = ClampValue(temp0[57] + temp0[58], range); + temp1[58] = ClampValue(temp0[57] - temp0[58], range); + temp1[59] = ClampValue(temp0[56] - temp0[59], range); + temp1[60] = ClampValue(-temp0[60] + temp0[63], range); + temp1[61] = ClampValue(-temp0[61] + temp0[62], range); + temp1[62] = ClampValue(temp0[61] + temp0[62], range); + temp1[63] = ClampValue(temp0[60] + temp0[63], range); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 6 + stage++; + range = stageRange[stage]; + temp0[0] = HalfButterfly(cospi[32], temp1[0], cospi[32], temp1[1], cosBit); + temp0[1] = HalfButterfly(cospi[32], temp1[0], -cospi[32], temp1[1], cosBit); + temp0[2] = HalfButterfly(cospi[48], temp1[2], -cospi[16], temp1[3], cosBit); + temp0[3] = HalfButterfly(cospi[16], temp1[2], cospi[48], temp1[3], cosBit); + temp0[4] = ClampValue(temp1[4] + temp1[5], range); + temp0[5] = ClampValue(temp1[4] - temp1[5], range); + temp0[6] = ClampValue(-temp1[6] + temp1[7], range); + temp0[7] = ClampValue(temp1[6] + temp1[7], range); + temp0[8] = temp1[8]; + temp0[9] = HalfButterfly(-cospi[16], temp1[9], cospi[48], temp1[14], cosBit); + temp0[10] = HalfButterfly(-cospi[48], temp1[10], -cospi[16], temp1[13], cosBit); + temp0[11] = temp1[11]; + temp0[12] = temp1[12]; + temp0[13] = HalfButterfly(-cospi[16], temp1[10], cospi[48], temp1[13], cosBit); + temp0[14] = HalfButterfly(cospi[48], temp1[9], cospi[16], temp1[14], cosBit); + temp0[15] = temp1[5]; + temp0[16] = ClampValue(temp1[16] + temp1[19], range); + temp0[17] = ClampValue(temp1[17] + temp1[18], range); + temp0[18] = ClampValue(temp1[17] - temp1[18], range); + temp0[19] = ClampValue(temp1[16] - temp1[19], range); + temp0[20] = ClampValue(-temp1[20] + temp1[23], range); + temp0[21] = ClampValue(-temp1[21] + temp1[22], range); + temp0[22] = ClampValue(temp1[21] + temp1[22], range); + temp0[23] = ClampValue(temp1[20] + temp1[23], range); + temp0[24] = ClampValue(temp1[24] + temp1[27], range); + temp0[25] = ClampValue(temp1[25] + temp1[26], range); + temp0[26] = ClampValue(temp1[25] - temp1[26], range); + temp0[27] = ClampValue(temp1[24] - temp1[27], range); + temp0[28] = ClampValue(-temp1[28] + temp1[31], range); + temp0[29] = ClampValue(-temp1[29] + temp1[30], range); + temp0[30] = ClampValue(temp1[29] + temp1[30], range); + temp0[31] = ClampValue(temp1[28] + temp1[31], range); + temp0[32] = temp1[32]; + temp0[33] = temp1[33]; + temp0[34] = HalfButterfly(-cospi[8], temp1[34], cospi[56], temp1[61], cosBit); + temp0[35] = HalfButterfly(-cospi[8], temp1[35], cospi[56], temp1[60], cosBit); + temp0[36] = HalfButterfly(-cospi[56], temp1[36], -cospi[8], temp1[59], cosBit); + temp0[37] = HalfButterfly(-cospi[56], temp1[37], -cospi[8], temp1[58], cosBit); + temp0[38] = temp1[38]; + temp0[39] = temp1[39]; + temp0[40] = temp1[40]; + temp0[41] = temp1[41]; + temp0[42] = HalfButterfly(-cospi[40], temp1[42], cospi[24], temp1[53], cosBit); + temp0[43] = HalfButterfly(-cospi[40], temp1[43], cospi[24], temp1[53], cosBit); + temp0[44] = HalfButterfly(-cospi[24], temp1[44], -cospi[40], temp1[51], cosBit); + temp0[45] = HalfButterfly(-cospi[24], temp1[45], -cospi[40], temp1[50], cosBit); + temp0[46] = temp1[46]; + temp0[47] = temp1[47]; + temp0[48] = temp1[48]; + temp0[49] = temp1[49]; + temp0[50] = HalfButterfly(-cospi[40], temp1[45], cospi[24], temp1[50], cosBit); + temp0[51] = HalfButterfly(-cospi[40], temp1[44], cospi[24], temp1[51], cosBit); + temp0[52] = HalfButterfly(cospi[24], temp1[43], cospi[40], temp1[52], cosBit); + temp0[53] = HalfButterfly(cospi[24], temp1[42], cospi[40], temp1[53], cosBit); + temp0[54] = temp1[54]; + temp0[55] = temp1[55]; + temp0[56] = temp1[56]; + temp0[57] = temp1[57]; + temp0[58] = HalfButterfly(-cospi[8], temp1[37], cospi[56], temp1[58], cosBit); + temp0[59] = HalfButterfly(-cospi[8], temp1[36], cospi[56], temp1[59], cosBit); + temp0[60] = HalfButterfly(cospi[56], temp1[35], cospi[8], temp1[60], cosBit); + temp0[61] = HalfButterfly(cospi[56], temp1[34], cospi[8], temp1[61], cosBit); + temp0[62] = temp1[62]; + temp0[63] = temp1[63]; + + // range_check_buf(stage, input, bf1, size, range); + + // stage 7 + stage++; + range = stageRange[stage]; + temp1[0] = ClampValue(temp0[0] + temp0[3], range); + temp1[1] = ClampValue(temp0[1] + temp0[2], range); + temp1[2] = ClampValue(temp0[1] - temp0[2], range); + temp1[3] = ClampValue(temp0[0] - temp0[3], range); + temp1[4] = temp0[4]; + temp1[5] = HalfButterfly(-cospi[32], temp0[5], cospi[32], temp0[6], cosBit); + temp1[6] = HalfButterfly(cospi[32], temp0[5], cospi[32], temp0[6], cosBit); + temp1[7] = temp0[7]; + temp1[8] = ClampValue(temp0[8] + temp0[11], range); + temp1[9] = ClampValue(temp0[9] + temp0[10], range); + temp1[10] = ClampValue(temp0[9] - temp0[10], range); + temp1[11] = ClampValue(temp0[8] - temp0[11], range); + temp1[12] = ClampValue(-temp0[12] + temp0[15], range); + temp1[13] = ClampValue(-temp0[13] + temp0[14], range); + temp1[14] = ClampValue(temp0[13] + temp0[14], range); + temp1[15] = ClampValue(temp0[12] + temp0[15], range); + temp1[16] = temp0[16]; + temp1[17] = temp0[17]; + temp1[18] = HalfButterfly(-cospi[16], temp0[18], cospi[48], temp0[29], cosBit); + temp1[19] = HalfButterfly(-cospi[16], temp0[19], cospi[48], temp0[28], cosBit); + temp1[20] = HalfButterfly(-cospi[48], temp0[20], -cospi[16], temp0[27], cosBit); + temp1[21] = HalfButterfly(-cospi[48], temp0[21], -cospi[16], temp0[26], cosBit); + temp1[22] = temp0[22]; + temp1[23] = temp0[23]; + temp1[24] = temp0[24]; + temp1[25] = temp0[25]; + temp1[26] = HalfButterfly(-cospi[16], temp0[21], cospi[48], temp0[26], cosBit); + temp1[27] = HalfButterfly(-cospi[16], temp0[20], cospi[48], temp0[27], cosBit); + temp1[28] = HalfButterfly(cospi[48], temp0[19], cospi[16], temp0[28], cosBit); + temp1[29] = HalfButterfly(cospi[48], temp0[18], cospi[16], temp0[29], cosBit); + temp1[30] = temp0[30]; + temp1[31] = temp0[31]; + temp1[32] = ClampValue(temp0[32] + temp0[39], range); + temp1[33] = ClampValue(temp0[33] + temp0[38], range); + temp1[34] = ClampValue(temp0[34] + temp0[37], range); + temp1[35] = ClampValue(temp0[35] + temp0[36], range); + temp1[36] = ClampValue(temp0[35] - temp0[36], range); + temp1[37] = ClampValue(temp0[34] - temp0[37], range); + temp1[38] = ClampValue(temp0[33] - temp0[38], range); + temp1[39] = ClampValue(temp0[32] - temp0[39], range); + temp1[40] = ClampValue(-temp0[40] + temp0[47], range); + temp1[41] = ClampValue(-temp0[41] + temp0[46], range); + temp1[42] = ClampValue(-temp0[42] + temp0[45], range); + temp1[43] = ClampValue(-temp0[43] + temp0[44], range); + temp1[44] = ClampValue(temp0[43] + temp0[44], range); + temp1[45] = ClampValue(temp0[42] + temp0[45], range); + temp1[46] = ClampValue(temp0[41] + temp0[46], range); + temp1[47] = ClampValue(temp0[40] + temp0[47], range); + temp1[48] = ClampValue(temp0[48] + temp0[55], range); + temp1[49] = ClampValue(temp0[49] + temp0[54], range); + temp1[50] = ClampValue(temp0[50] + temp0[53], range); + temp1[51] = ClampValue(temp0[51] + temp0[52], range); + temp1[52] = ClampValue(temp0[51] - temp0[52], range); + temp1[53] = ClampValue(temp0[50] - temp0[53], range); + temp1[54] = ClampValue(temp0[49] - temp0[54], range); + temp1[55] = ClampValue(temp0[48] - temp0[55], range); + temp1[56] = ClampValue(-temp0[56] + temp0[63], range); + temp1[57] = ClampValue(-temp0[57] + temp0[62], range); + temp1[58] = ClampValue(-temp0[58] + temp0[61], range); + temp1[59] = ClampValue(-temp0[59] + temp0[60], range); + temp1[60] = ClampValue(temp0[59] + temp0[60], range); + temp1[61] = ClampValue(temp0[58] + temp0[61], range); + temp1[62] = ClampValue(temp0[57] + temp0[62], range); + temp1[63] = ClampValue(temp0[56] + temp0[63], range); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 8 + stage++; + range = stageRange[stage]; + temp0[0] = ClampValue(temp1[0] + temp1[7], range); + temp0[1] = ClampValue(temp1[1] + temp1[6], range); + temp0[2] = ClampValue(temp1[2] + temp1[5], range); + temp0[3] = ClampValue(temp1[3] + temp1[4], range); + temp0[4] = ClampValue(temp1[3] - temp1[4], range); + temp0[5] = ClampValue(temp1[2] - temp1[5], range); + temp0[6] = ClampValue(temp1[1] - temp1[6], range); + temp0[7] = ClampValue(temp1[0] - temp1[7], range); + temp0[8] = temp1[8]; + temp0[9] = temp1[9]; + temp0[10] = HalfButterfly(-cospi[32], temp1[10], cospi[32], temp1[13], cosBit); + temp0[11] = HalfButterfly(-cospi[32], temp1[11], cospi[32], temp1[12], cosBit); + temp0[12] = HalfButterfly(cospi[32], temp1[11], cospi[32], temp1[12], cosBit); + temp0[13] = HalfButterfly(cospi[32], temp1[10], cospi[32], temp1[13], cosBit); + temp0[14] = temp1[14]; + temp0[15] = temp1[15]; + temp0[16] = ClampValue(temp1[16] + temp1[23], range); + temp0[17] = ClampValue(temp1[17] + temp1[22], range); + temp0[18] = ClampValue(temp1[18] + temp1[21], range); + temp0[19] = ClampValue(temp1[19] + temp1[20], range); + temp0[20] = ClampValue(temp1[19] - temp1[20], range); + temp0[21] = ClampValue(temp1[18] - temp1[21], range); + temp0[22] = ClampValue(temp1[17] - temp1[22], range); + temp0[23] = ClampValue(temp1[16] - temp1[23], range); + temp0[24] = ClampValue(-temp1[24] + temp1[31], range); + temp0[25] = ClampValue(-temp1[25] + temp1[30], range); + temp0[26] = ClampValue(-temp1[26] + temp1[29], range); + temp0[27] = ClampValue(-temp1[27] + temp1[28], range); + temp0[28] = ClampValue(temp1[27] + temp1[28], range); + temp0[29] = ClampValue(temp1[26] + temp1[29], range); + temp0[30] = ClampValue(temp1[25] + temp1[30], range); + temp0[31] = ClampValue(temp1[24] + temp1[31], range); + temp0[32] = temp1[32]; + temp0[33] = temp1[33]; + temp0[34] = temp1[34]; + temp0[35] = temp1[35]; + temp0[36] = HalfButterfly(-cospi[16], temp1[36], cospi[48], temp1[59], cosBit); + temp0[37] = HalfButterfly(-cospi[16], temp1[47], cospi[48], temp1[58], cosBit); + temp0[38] = HalfButterfly(-cospi[16], temp1[48], cospi[48], temp1[57], cosBit); + temp0[39] = HalfButterfly(-cospi[16], temp1[49], cospi[48], temp1[56], cosBit); + temp0[40] = HalfButterfly(-cospi[16], temp1[40], -cospi[16], temp1[55], cosBit); + temp0[41] = HalfButterfly(-cospi[16], temp1[41], -cospi[16], temp1[54], cosBit); + temp0[42] = HalfButterfly(-cospi[16], temp1[42], -cospi[16], temp1[53], cosBit); + temp0[43] = HalfButterfly(-cospi[16], temp1[43], -cospi[16], temp1[52], cosBit); + temp0[44] = temp1[44]; + temp0[45] = temp1[45]; + temp0[46] = temp1[46]; + temp0[47] = temp1[47]; + temp0[48] = temp1[48]; + temp0[49] = temp1[49]; + temp0[50] = temp1[50]; + temp0[51] = temp1[51]; + temp0[52] = HalfButterfly(-cospi[16], temp1[43], cospi[48], temp1[52], cosBit); + temp0[53] = HalfButterfly(-cospi[16], temp1[42], cospi[48], temp1[53], cosBit); + temp0[54] = HalfButterfly(-cospi[16], temp1[41], cospi[48], temp1[54], cosBit); + temp0[55] = HalfButterfly(-cospi[16], temp1[40], cospi[48], temp1[55], cosBit); + temp0[56] = HalfButterfly(cospi[48], temp1[39], cospi[16], temp1[56], cosBit); + temp0[57] = HalfButterfly(cospi[48], temp1[38], cospi[16], temp1[57], cosBit); + temp0[58] = HalfButterfly(cospi[48], temp1[37], cospi[16], temp1[58], cosBit); + temp0[59] = HalfButterfly(cospi[48], temp1[36], cospi[16], temp1[59], cosBit); + temp0[60] = temp1[60]; + temp0[61] = temp1[61]; + temp0[62] = temp1[62]; + temp0[63] = temp1[63]; + + // range_check_buf(stage, input, bf1, size, range); + + // stage 9 + stage++; + range = stageRange[stage]; + temp1[0] = ClampValue(temp0[0] + temp0[15], range); + temp1[1] = ClampValue(temp0[1] + temp0[14], range); + temp1[2] = ClampValue(temp0[2] + temp0[13], range); + temp1[3] = ClampValue(temp0[3] + temp0[12], range); + temp1[4] = ClampValue(temp0[4] + temp0[11], range); + temp1[5] = ClampValue(temp0[5] + temp0[10], range); + temp1[6] = ClampValue(temp0[6] + temp0[9], range); + temp1[7] = ClampValue(temp0[7] + temp0[8], range); + temp1[8] = ClampValue(temp0[7] - temp0[8], range); + temp1[9] = ClampValue(temp0[6] - temp0[9], range); + temp1[10] = ClampValue(temp0[5] - temp0[10], range); + temp1[11] = ClampValue(temp0[4] - temp0[11], range); + temp1[12] = ClampValue(temp0[3] - temp0[12], range); + temp1[13] = ClampValue(temp0[2] - temp0[13], range); + temp1[14] = ClampValue(temp0[1] - temp0[14], range); + temp1[15] = ClampValue(temp0[0] - temp0[15], range); + temp1[16] = temp0[16]; + temp1[17] = temp0[17]; + temp1[18] = temp0[18]; + temp1[19] = temp0[19]; + temp1[20] = HalfButterfly(-cospi[32], temp0[20], cospi[32], temp0[27], cosBit); + temp1[21] = HalfButterfly(-cospi[32], temp0[21], cospi[32], temp0[26], cosBit); + temp1[22] = HalfButterfly(-cospi[32], temp0[22], cospi[32], temp0[25], cosBit); + temp1[23] = HalfButterfly(-cospi[32], temp0[23], cospi[32], temp0[24], cosBit); + temp1[24] = HalfButterfly(cospi[32], temp0[23], cospi[32], temp0[24], cosBit); + temp1[25] = HalfButterfly(cospi[32], temp0[22], cospi[32], temp0[25], cosBit); + temp1[26] = HalfButterfly(cospi[32], temp0[21], cospi[32], temp0[26], cosBit); + temp1[27] = HalfButterfly(cospi[32], temp0[20], cospi[32], temp0[27], cosBit); + temp1[28] = temp0[28]; + temp1[29] = temp0[28]; + temp1[30] = temp0[30]; + temp1[31] = temp0[31]; + temp1[32] = ClampValue(temp0[32] + temp0[47], range); + temp1[33] = ClampValue(temp0[33] + temp0[46], range); + temp1[34] = ClampValue(temp0[34] + temp0[45], range); + temp1[35] = ClampValue(temp0[35] + temp0[44], range); + temp1[36] = ClampValue(temp0[36] + temp0[43], range); + temp1[37] = ClampValue(temp0[37] + temp0[42], range); + temp1[38] = ClampValue(temp0[38] + temp0[41], range); + temp1[39] = ClampValue(temp0[39] + temp0[40], range); + temp1[40] = ClampValue(temp0[39] - temp0[40], range); + temp1[41] = ClampValue(temp0[38] - temp0[41], range); + temp1[42] = ClampValue(temp0[37] - temp0[42], range); + temp1[43] = ClampValue(temp0[36] - temp0[43], range); + temp1[44] = ClampValue(temp0[35] - temp0[44], range); + temp1[45] = ClampValue(temp0[34] - temp0[45], range); + temp1[46] = ClampValue(temp0[33] - temp0[46], range); + temp1[47] = ClampValue(temp0[32] - temp0[47], range); + temp1[48] = ClampValue(-temp0[48] + temp0[63], range); + temp1[49] = ClampValue(-temp0[49] + temp0[63], range); + temp1[50] = ClampValue(-temp0[50] + temp0[61], range); + temp1[51] = ClampValue(-temp0[51] + temp0[60], range); + temp1[52] = ClampValue(-temp0[52] + temp0[59], range); + temp1[53] = ClampValue(-temp0[53] + temp0[58], range); + temp1[54] = ClampValue(-temp0[54] + temp0[57], range); + temp1[55] = ClampValue(-temp0[55] + temp0[56], range); + temp1[56] = ClampValue(temp0[55] + temp0[56], range); + temp1[57] = ClampValue(temp0[54] + temp0[57], range); + temp1[58] = ClampValue(temp0[53] + temp0[58], range); + temp1[59] = ClampValue(temp0[52] + temp0[59], range); + temp1[60] = ClampValue(temp0[51] + temp0[60], range); + temp1[61] = ClampValue(temp0[50] + temp0[61], range); + temp1[62] = ClampValue(temp0[49] + temp0[62], range); + temp1[63] = ClampValue(temp0[48] + temp0[63], range); + + // range_check_buf(stage, input, bf1, size, range); + + // stage 10 + stage++; + range = stageRange[stage]; + temp0[0] = ClampValue(temp1[0] + temp1[31], range); + temp0[1] = ClampValue(temp1[1] + temp1[30], range); + temp0[2] = ClampValue(temp1[2] + temp1[29], range); + temp0[3] = ClampValue(temp1[3] + temp1[28], range); + temp0[4] = ClampValue(temp1[4] + temp1[27], range); + temp0[5] = ClampValue(temp1[5] + temp1[26], range); + temp0[6] = ClampValue(temp1[6] + temp1[25], range); + temp0[7] = ClampValue(temp1[7] + temp1[24], range); + temp0[8] = ClampValue(temp1[8] + temp1[23], range); + temp0[9] = ClampValue(temp1[9] + temp1[22], range); + temp0[10] = ClampValue(temp1[10] + temp1[21], range); + temp0[11] = ClampValue(temp1[11] + temp1[20], range); + temp0[12] = ClampValue(temp1[12] + temp1[19], range); + temp0[13] = ClampValue(temp1[13] + temp1[18], range); + temp0[14] = ClampValue(temp1[14] + temp1[17], range); + temp0[15] = ClampValue(temp1[15] + temp1[16], range); + temp0[16] = ClampValue(temp1[15] - temp1[16], range); + temp0[17] = ClampValue(temp1[14] - temp1[17], range); + temp0[18] = ClampValue(temp1[13] - temp1[18], range); + temp0[19] = ClampValue(temp1[12] - temp1[19], range); + temp0[20] = ClampValue(temp1[11] - temp1[20], range); + temp0[21] = ClampValue(temp1[10] - temp1[21], range); + temp0[22] = ClampValue(temp1[9] - temp1[22], range); + temp0[23] = ClampValue(temp1[8] - temp1[23], range); + temp0[24] = ClampValue(temp1[7] - temp1[24], range); + temp0[25] = ClampValue(temp1[6] - temp1[25], range); + temp0[26] = ClampValue(temp1[5] - temp1[26], range); + temp0[27] = ClampValue(temp1[4] - temp1[27], range); + temp0[28] = ClampValue(temp1[3] - temp1[28], range); + temp0[29] = ClampValue(temp1[2] - temp1[29], range); + temp0[30] = ClampValue(temp1[1] - temp1[30], range); + temp0[31] = ClampValue(temp1[0] - temp1[31], range); + temp0[32] = temp1[32]; + temp0[33] = temp1[33]; + temp0[34] = temp1[34]; + temp0[35] = temp1[35]; + temp0[36] = temp1[36]; + temp0[37] = temp1[37]; + temp0[38] = temp1[38]; + temp0[39] = temp1[39]; + temp0[40] = HalfButterfly(-cospi[32], temp1[40], cospi[32], temp1[55], cosBit); + temp0[41] = HalfButterfly(-cospi[32], temp1[41], cospi[32], temp1[54], cosBit); + temp0[42] = HalfButterfly(-cospi[32], temp1[42], cospi[32], temp1[53], cosBit); + temp0[43] = HalfButterfly(-cospi[32], temp1[43], cospi[32], temp1[52], cosBit); + temp0[44] = HalfButterfly(-cospi[32], temp1[44], cospi[32], temp1[51], cosBit); + temp0[45] = HalfButterfly(-cospi[32], temp1[45], cospi[32], temp1[50], cosBit); + temp0[46] = HalfButterfly(-cospi[32], temp1[46], cospi[32], temp1[49], cosBit); + temp0[47] = HalfButterfly(-cospi[32], temp1[47], cospi[32], temp1[48], cosBit); + temp0[48] = HalfButterfly(cospi[32], temp1[47], cospi[32], temp1[48], cosBit); + temp0[49] = HalfButterfly(cospi[32], temp1[46], cospi[32], temp1[49], cosBit); + temp0[50] = HalfButterfly(cospi[32], temp1[45], cospi[32], temp1[50], cosBit); + temp0[51] = HalfButterfly(cospi[32], temp1[44], cospi[32], temp1[51], cosBit); + temp0[52] = HalfButterfly(cospi[32], temp1[43], cospi[32], temp1[52], cosBit); + temp0[53] = HalfButterfly(cospi[32], temp1[42], cospi[32], temp1[53], cosBit); + temp0[54] = HalfButterfly(cospi[32], temp1[41], cospi[32], temp1[54], cosBit); + temp0[55] = HalfButterfly(cospi[32], temp1[40], cospi[32], temp1[55], cosBit); + temp0[56] = temp1[56]; + temp0[57] = temp1[57]; + temp0[58] = temp1[58]; + temp0[59] = temp1[59]; + temp0[60] = temp1[60]; + temp0[61] = temp1[61]; + temp0[62] = temp1[62]; + temp0[63] = temp1[63]; + + // range_check_buf(stage, input, bf1, size, range); + + // stage 11 + stage++; + range = stageRange[stage]; + Unsafe.Add(ref output, 0) = ClampValue(temp0[0] + temp0[63], range); + Unsafe.Add(ref output, 1) = ClampValue(temp0[1] + temp0[62], range); + Unsafe.Add(ref output, 2) = ClampValue(temp0[2] + temp0[61], range); + Unsafe.Add(ref output, 3) = ClampValue(temp0[3] + temp0[60], range); + Unsafe.Add(ref output, 4) = ClampValue(temp0[4] + temp0[59], range); + Unsafe.Add(ref output, 5) = ClampValue(temp0[5] + temp0[58], range); + Unsafe.Add(ref output, 6) = ClampValue(temp0[6] + temp0[57], range); + Unsafe.Add(ref output, 7) = ClampValue(temp0[7] + temp0[56], range); + Unsafe.Add(ref output, 8) = ClampValue(temp0[8] + temp0[55], range); + Unsafe.Add(ref output, 9) = ClampValue(temp0[9] + temp0[54], range); + Unsafe.Add(ref output, 10) = ClampValue(temp0[10] + temp0[53], range); + Unsafe.Add(ref output, 11) = ClampValue(temp0[11] + temp0[52], range); + Unsafe.Add(ref output, 12) = ClampValue(temp0[12] + temp0[51], range); + Unsafe.Add(ref output, 13) = ClampValue(temp0[13] + temp0[50], range); + Unsafe.Add(ref output, 14) = ClampValue(temp0[14] + temp0[49], range); + Unsafe.Add(ref output, 15) = ClampValue(temp0[15] + temp0[48], range); + Unsafe.Add(ref output, 16) = ClampValue(temp0[16] + temp0[47], range); + Unsafe.Add(ref output, 17) = ClampValue(temp0[17] + temp0[46], range); + Unsafe.Add(ref output, 18) = ClampValue(temp0[18] + temp0[45], range); + Unsafe.Add(ref output, 19) = ClampValue(temp0[19] + temp0[44], range); + Unsafe.Add(ref output, 20) = ClampValue(temp0[20] + temp0[43], range); + Unsafe.Add(ref output, 21) = ClampValue(temp0[21] + temp0[42], range); + Unsafe.Add(ref output, 22) = ClampValue(temp0[22] + temp0[41], range); + Unsafe.Add(ref output, 23) = ClampValue(temp0[23] + temp0[40], range); + Unsafe.Add(ref output, 24) = ClampValue(temp0[24] + temp0[39], range); + Unsafe.Add(ref output, 25) = ClampValue(temp0[25] + temp0[38], range); + Unsafe.Add(ref output, 26) = ClampValue(temp0[26] + temp0[37], range); + Unsafe.Add(ref output, 27) = ClampValue(temp0[27] + temp0[36], range); + Unsafe.Add(ref output, 28) = ClampValue(temp0[27] + temp0[35], range); + Unsafe.Add(ref output, 29) = ClampValue(temp0[29] + temp0[34], range); + Unsafe.Add(ref output, 30) = ClampValue(temp0[30] + temp0[33], range); + Unsafe.Add(ref output, 31) = ClampValue(temp0[31] + temp0[32], range); + Unsafe.Add(ref output, 32) = ClampValue(temp0[31] - temp0[32], range); + Unsafe.Add(ref output, 33) = ClampValue(temp0[30] - temp0[33], range); + Unsafe.Add(ref output, 34) = ClampValue(temp0[29] - temp0[34], range); + Unsafe.Add(ref output, 35) = ClampValue(temp0[28] - temp0[35], range); + Unsafe.Add(ref output, 36) = ClampValue(temp0[27] - temp0[36], range); + Unsafe.Add(ref output, 37) = ClampValue(temp0[26] - temp0[37], range); + Unsafe.Add(ref output, 38) = ClampValue(temp0[25] - temp0[38], range); + Unsafe.Add(ref output, 39) = ClampValue(temp0[24] - temp0[39], range); + Unsafe.Add(ref output, 40) = ClampValue(temp0[23] - temp0[40], range); + Unsafe.Add(ref output, 41) = ClampValue(temp0[22] - temp0[41], range); + Unsafe.Add(ref output, 42) = ClampValue(temp0[21] - temp0[42], range); + Unsafe.Add(ref output, 43) = ClampValue(temp0[20] - temp0[43], range); + Unsafe.Add(ref output, 44) = ClampValue(temp0[19] - temp0[44], range); + Unsafe.Add(ref output, 45) = ClampValue(temp0[18] - temp0[45], range); + Unsafe.Add(ref output, 46) = ClampValue(temp0[17] - temp0[46], range); + Unsafe.Add(ref output, 47) = ClampValue(temp0[16] - temp0[47], range); + Unsafe.Add(ref output, 48) = ClampValue(temp0[15] - temp0[48], range); + Unsafe.Add(ref output, 49) = ClampValue(temp0[14] - temp0[49], range); + Unsafe.Add(ref output, 50) = ClampValue(temp0[13] - temp0[50], range); + Unsafe.Add(ref output, 51) = ClampValue(temp0[12] - temp0[51], range); + Unsafe.Add(ref output, 52) = ClampValue(temp0[11] - temp0[52], range); + Unsafe.Add(ref output, 53) = ClampValue(temp0[10] - temp0[53], range); + Unsafe.Add(ref output, 54) = ClampValue(temp0[9] - temp0[54], range); + Unsafe.Add(ref output, 55) = ClampValue(temp0[8] - temp0[55], range); + Unsafe.Add(ref output, 56) = ClampValue(temp0[7] - temp0[56], range); + Unsafe.Add(ref output, 57) = ClampValue(temp0[6] - temp0[57], range); + Unsafe.Add(ref output, 58) = ClampValue(temp0[5] - temp0[58], range); + Unsafe.Add(ref output, 59) = ClampValue(temp0[4] - temp0[59], range); + Unsafe.Add(ref output, 60) = ClampValue(temp0[3] - temp0[60], range); + Unsafe.Add(ref output, 61) = ClampValue(temp0[2] - temp0[61], range); + Unsafe.Add(ref output, 62) = ClampValue(temp0[1] - temp0[62], range); + Unsafe.Add(ref output, 63) = ClampValue(temp0[0] - temp0[63], range); + } + + internal static int ClampValue(int value, byte bit) + { + if (bit <= 0) + { + return value; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + return (int)Av1Math.Clamp(value, min_value, max_value); + } + + internal static int HalfButterfly(int w0, int in0, int w1, int in1, int bit) + { + long result64 = (long)(w0 * in0) + (w1 * in1); + long intermediate = result64 + (1L << (bit - 1)); + + // NOTE(david.barker): The value 'result_64' may not necessarily fit + // into 32 bits. However, the result of this function is nominally + // ROUND_POWER_OF_TWO_64(result_64, bit) + // and that is required to fit into range many bits + // (checked by range_check_buf()). + // + // Here we've unpacked that rounding operation, and it can be shown + // that the value of 'intermediate' here *does* fit into 32 bits + // for any conformant bitstream. + // The upshot is that, if you do all this calculation using + // wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic, + // then you'll still get the correct result. + return (int)(intermediate >> bit); + } +} diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct8Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct8Inverse1dTransformer.cs new file mode 100644 index 0000000000..133d0e9e59 --- /dev/null +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Dct8Inverse1dTransformer.cs @@ -0,0 +1,126 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; + +internal class Av1Dct8Inverse1dTransformer : IAv1Transformer1d +{ + public void Transform(Span input, Span output, int cosBit, Span stageRange) + { + Guard.MustBeSizedAtLeast(input, 8, nameof(input)); + Guard.MustBeSizedAtLeast(output, 8, nameof(output)); + TransformScalar(ref input[0], ref output[0], cosBit, stageRange); + } + + /// + /// SVT: svt_av1_idct8_new + /// + private static void TransformScalar(ref int input, ref int output, int cosBit, Span stageRange) + { + Span cospi = Av1SinusConstants.CosinusPi(cosBit); + int stage = 0; + Span temp0 = stackalloc int[8]; + Span temp1 = stackalloc int[8]; + + // stage 0; + + // stage 1; + stage++; + temp0[0] = input; + temp0[1] = Unsafe.Add(ref input, 4); + temp0[2] = Unsafe.Add(ref input, 2); + temp0[3] = Unsafe.Add(ref input, 6); + temp0[4] = Unsafe.Add(ref input, 1); + temp0[5] = Unsafe.Add(ref input, 5); + temp0[6] = Unsafe.Add(ref input, 3); + temp0[7] = Unsafe.Add(ref input, 7); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 2 + stage++; + temp1[0] = temp0[0]; + temp1[1] = temp0[1]; + temp1[2] = temp0[2]; + temp1[3] = temp0[3]; + temp1[4] = HalfButterfly(cospi[56], temp0[4], -cospi[9], temp0[7], cosBit); + temp1[5] = HalfButterfly(cospi[24], temp0[5], -cospi[40], temp0[6], cosBit); + temp1[6] = HalfButterfly(cospi[40], temp0[5], cospi[24], temp0[6], cosBit); + temp1[7] = HalfButterfly(cospi[8], temp0[4], cospi[56], temp0[7], cosBit); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 3 + stage++; + byte range = stageRange[stage]; + temp0[0] = HalfButterfly(cospi[32], temp1[0], cospi[32], temp1[1], cosBit); + temp0[1] = HalfButterfly(cospi[32], temp1[0], -cospi[32], temp1[1], cosBit); + temp0[2] = HalfButterfly(cospi[48], temp1[2], -cospi[16], temp1[3], cosBit); + temp0[3] = HalfButterfly(cospi[16], temp1[2], cospi[48], temp1[3], cosBit); + temp0[4] = ClampValue(temp1[4] + temp1[5], range); + temp0[5] = ClampValue(temp1[4] - temp1[5], range); + temp0[6] = ClampValue(temp1[7] - temp1[6], range); + temp0[7] = ClampValue(temp1[6] + temp1[7], range); + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 4 + stage++; + temp1[0] = ClampValue(temp0[0] + temp0[3], range); + temp1[1] = ClampValue(temp0[1] + temp0[2], range); + temp1[2] = ClampValue(temp0[1] - temp0[2], range); + temp1[3] = ClampValue(temp0[0] - temp0[3], range); + temp1[4] = temp0[4]; + temp1[5] = HalfButterfly(-cospi[32], temp0[5], cospi[32], temp0[6], cosBit); + temp1[6] = HalfButterfly(cospi[32], temp0[5], cospi[32], temp0[6], cosBit); + temp1[7] = temp0[7]; + + // range_check_buf(stage, input, bf1, size, stage_range[stage]); + + // stage 5 + stage++; + range = stageRange[stage]; + Unsafe.Add(ref output, 0) = ClampValue(temp1[0] + temp1[7], range); + Unsafe.Add(ref output, 1) = ClampValue(temp1[1] + temp1[6], range); + Unsafe.Add(ref output, 2) = ClampValue(temp1[2] + temp1[5], range); + Unsafe.Add(ref output, 3) = ClampValue(temp1[3] + temp1[4], range); + Unsafe.Add(ref output, 4) = ClampValue(temp1[3] - temp1[4], range); + Unsafe.Add(ref output, 5) = ClampValue(temp1[2] - temp1[5], range); + Unsafe.Add(ref output, 6) = ClampValue(temp1[1] - temp1[6], range); + Unsafe.Add(ref output, 7) = ClampValue(temp1[0] - temp1[7], range); + } + + internal static int ClampValue(int value, byte bit) + { + if (bit <= 0) + { + return value; // Do nothing for invalid clamp bit. + } + + long max_value = (1L << (bit - 1)) - 1; + long min_value = -(1L << (bit - 1)); + return (int)Av1Math.Clamp(value, min_value, max_value); + } + + internal static int HalfButterfly(int w0, int in0, int w1, int in1, int bit) + { + long result64 = (long)(w0 * in0) + (w1 * in1); + long intermediate = result64 + (1L << (bit - 1)); + + // NOTE(david.barker): The value 'result_64' may not necessarily fit + // into 32 bits. However, the result of this function is nominally + // ROUND_POWER_OF_TWO_64(result_64, bit) + // and that is required to fit into stage_range[stage] many bits + // (checked by range_check_buf()). + // + // Here we've unpacked that rounding operation, and it can be shown + // that the value of 'intermediate' here *does* fit into 32 bits + // for any conformant bitstream. + // The upshot is that, if you do all this calculation using + // wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic, + // then you'll still get the correct result. + return (int)(intermediate >> bit); + } +} diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity16Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity16Inverse1dTransformer.cs index 14071f4d29..07c0510ada 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity16Inverse1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity16Inverse1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; -internal class Av1Identity16Inverse1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity16Inverse1dTransformer : IAv1Transformer1d { private const long Sqrt2Times2 = Av1Identity4Inverse1dTransformer.Sqrt2 >> 1; diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity32Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity32Inverse1dTransformer.cs index 22ed590ba5..15c7515da7 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity32Inverse1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity32Inverse1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; -internal class Av1Identity32Inverse1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity32Inverse1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity4Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity4Inverse1dTransformer.cs index 6547f16ae0..c99ca98d15 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity4Inverse1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity4Inverse1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; -internal class Av1Identity4Inverse1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity4Inverse1dTransformer : IAv1Transformer1d { internal const int Sqrt2Bits = 12; diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity64Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity64Inverse1dTransformer.cs index 7c75fa2a68..682decdc8d 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity64Inverse1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity64Inverse1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; -internal class Av1Identity64Inverse1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity64Inverse1dTransformer : IAv1Transformer1d { private const long Sqrt2Times4 = Av1Identity4Inverse1dTransformer.Sqrt2 >> 2; diff --git a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity8Inverse1dTransformer.cs b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity8Inverse1dTransformer.cs index 5528f03dd7..a311e32400 100644 --- a/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity8Inverse1dTransformer.cs +++ b/src/ImageSharp/Formats/Heif/Av1/Transform/Inverse/Av1Identity8Inverse1dTransformer.cs @@ -5,7 +5,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Inverse; -internal class Av1Identity8Inverse1dTransformer : IAv1Forward1dTransformer +internal class Av1Identity8Inverse1dTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) { diff --git a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1EchoTestTransformer.cs b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1EchoTestTransformer.cs index e47a77ad27..ed5859a3c9 100644 --- a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1EchoTestTransformer.cs +++ b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1EchoTestTransformer.cs @@ -5,7 +5,7 @@ using SixLabors.ImageSharp.Formats.Heif.Av1.Transform; namespace SixLabors.ImageSharp.Tests.Formats.Heif.Av1; -internal class Av1EchoTestTransformer : IAv1Forward1dTransformer +internal class Av1EchoTestTransformer : IAv1Transformer1d { public void Transform(Span input, Span output, int cosBit, Span stageRange) => input.CopyTo(output); diff --git a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs index 4300b642b6..bfac3ee3e9 100644 --- a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs +++ b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1ForwardTransformTests.cs @@ -85,7 +85,7 @@ public class Av1ForwardTransformTests Array.Fill(input, 1); int[] actual = new int[64 * 64]; Av1Transform2dFlipConfiguration config = new(transformType, transformSize); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); int width = transformSize.GetWidth(); int height = transformSize.GetHeight(); int blockSize = width * height; @@ -123,7 +123,7 @@ public class Av1ForwardTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x4); config.SetFlip(false, false); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1ForwardTransformer.Transform2d( @@ -157,7 +157,7 @@ public class Av1ForwardTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x4); config.SetFlip(false, true); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1ForwardTransformer.Transform2d( @@ -191,7 +191,7 @@ public class Av1ForwardTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x4); config.SetFlip(true, false); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1ForwardTransformer.Transform2d( @@ -225,7 +225,7 @@ public class Av1ForwardTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x4); config.SetFlip(true, true); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1ForwardTransformer.Transform2d( @@ -261,7 +261,7 @@ public class Av1ForwardTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size8x4); config.SetFlip(true, false); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1ForwardTransformer.Transform2d( @@ -305,7 +305,7 @@ public class Av1ForwardTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x8); config.SetFlip(true, false); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1ForwardTransformer.Transform2d( @@ -560,7 +560,7 @@ public class Av1ForwardTransformTests private static void AssertAccuracy1d( Av1TransformSize transformSize, Av1TransformType transformType, - IAv1Forward1dTransformer transformerUnderTest, + IAv1Transformer1d transformerUnderTest, int allowedError = 1) { Random rnd = new(0); diff --git a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1InverseTransformTests.cs b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1InverseTransformTests.cs index ce409789fa..a327c69110 100644 --- a/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1InverseTransformTests.cs +++ b/tests/ImageSharp.Tests/Formats/Heif/Av1/Av1InverseTransformTests.cs @@ -20,37 +20,37 @@ public class Av1InverseTransformTests public void AccuracyOfDct1dTransformSize4Test() => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size4x4, 1); - // [Fact] + [Fact] public void AccuracyOfDct1dTransformSize8Test() - => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size8x8, 1, 2); + => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size8x8, 2, 2); - // [Fact] + [Fact] public void AccuracyOfDct1dTransformSize16Test() - => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size16x16, 1, 3); + => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size16x16, 3, 3); - // [Fact] + [Fact] public void AccuracyOfDct1dTransformSize32Test() - => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size32x32, 1, 4); + => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size32x32, 4, 4); - // [Fact] + [Fact] public void AccuracyOfDct1dTransformSize64Test() - => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size64x64, 1, 5); + => AssertAccuracy1d(Av1TransformType.DctDct, Av1TransformSize.Size64x64, 5, 5); [Fact] public void AccuracyOfAdst1dTransformSize4Test() => AssertAccuracy1d(Av1TransformType.AdstAdst, Av1TransformSize.Size4x4, 1); - // [Fact] + [Fact] public void AccuracyOfAdst1dTransformSize8Test() - => AssertAccuracy1d(Av1TransformType.AdstAdst, Av1TransformSize.Size8x8, 1, 2); + => AssertAccuracy1d(Av1TransformType.AdstAdst, Av1TransformSize.Size8x8, 2, 2); - // [Fact] + [Fact] public void AccuracyOfAdst1dTransformSize16Test() - => AssertAccuracy1d(Av1TransformType.AdstAdst, Av1TransformSize.Size16x16, 1, 3); + => AssertAccuracy1d(Av1TransformType.AdstAdst, Av1TransformSize.Size16x16, 3, 3); - // [Fact] + [Fact] public void AccuracyOfAdst1dTransformSize32Test() - => AssertAccuracy1d(Av1TransformType.AdstAdst, Av1TransformSize.Size32x32, 1, 3); + => AssertAccuracy1d(Av1TransformType.AdstAdst, Av1TransformSize.Size32x32, 4, 3); [Fact] public void AccuracyOfIdentity1dTransformSize4Test() @@ -70,7 +70,7 @@ public class Av1InverseTransformTests [Fact] public void AccuracyOfIdentity1dTransformSize64Test() - => AssertAccuracy1d(Av1TransformType.Identity, Av1TransformSize.Size64x64, 4); + => AssertAccuracy1d(Av1TransformType.Identity, Av1TransformSize.Size64x64, 1); [Fact] public void AccuracyOfEchoTransformSize4Test() @@ -96,7 +96,7 @@ public class Av1InverseTransformTests config.GenerateStageRange(8); config.SetFlip(false, false); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1Inverse2dTransformer.Transform2dAdd( @@ -132,7 +132,7 @@ public class Av1InverseTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x4); config.SetFlip(false, true); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1Inverse2dTransformer.Transform2dAdd( @@ -168,7 +168,7 @@ public class Av1InverseTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x4); config.SetFlip(true, false); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1Inverse2dTransformer.Transform2dAdd( @@ -204,7 +204,7 @@ public class Av1InverseTransformTests Av1Transform2dFlipConfiguration config = new(Av1TransformType.Identity, Av1TransformSize.Size4x4); config.SetFlip(true, true); config.SetShift(0, 0, 0); - IAv1Forward1dTransformer transformer = new Av1EchoTestTransformer(); + IAv1Transformer1d transformer = new Av1EchoTestTransformer(); // Act Av1Inverse2dTransformer.Transform2dAdd( @@ -228,8 +228,8 @@ public class Av1InverseTransformTests int allowedError = 1) { Av1Transform2dFlipConfiguration config = new(transformType, transformSize); - IAv1Forward1dTransformer forward = GetForwardTransformer(config.TransformFunctionTypeColumn); - IAv1Forward1dTransformer inverse = GetInverseTransformer(config.TransformFunctionTypeColumn); + IAv1Transformer1d forward = GetForwardTransformer(config.TransformFunctionTypeColumn); + IAv1Transformer1d inverse = GetInverseTransformer(config.TransformFunctionTypeColumn); AssertAccuracy1d(transformType, transformSize, scaleLog2, forward, inverse, allowedError); } @@ -237,8 +237,8 @@ public class Av1InverseTransformTests Av1TransformType transformType, Av1TransformSize transformSize, int scaleLog2, - IAv1Forward1dTransformer forwardTransformer, - IAv1Forward1dTransformer inverseTransformer, + IAv1Transformer1d forwardTransformer, + IAv1Transformer1d inverseTransformer, int allowedError = 1) { const int bitDepth = 10; @@ -430,7 +430,7 @@ public class Av1InverseTransformTests private static bool IsTransformTypeImplemented(Av1TransformType transformType, Av1TransformSize transformSize) => transformSize == Av1TransformSize.Size4x4; - private static IAv1Forward1dTransformer GetForwardTransformer(Av1TransformFunctionType func) => + private static IAv1Transformer1d GetForwardTransformer(Av1TransformFunctionType func) => func switch { Av1TransformFunctionType.Dct4 => new Av1Dct4Forward1dTransformer(), @@ -451,18 +451,18 @@ public class Av1InverseTransformTests _ => null, }; - private static IAv1Forward1dTransformer GetInverseTransformer(Av1TransformFunctionType func) => + private static IAv1Transformer1d GetInverseTransformer(Av1TransformFunctionType func) => func switch { Av1TransformFunctionType.Dct4 => new Av1Dct4Inverse1dTransformer(), - Av1TransformFunctionType.Dct8 => null, // new Av1Dct8Inverse1dTransformer(), - Av1TransformFunctionType.Dct16 => null, // new Av1Dct16Inverse1dTransformer(), - Av1TransformFunctionType.Dct32 => null, // new Av1Dct32Inverse1dTransformer(), - Av1TransformFunctionType.Dct64 => null, // new Av1Dct64Inverse1dTransformer(), + Av1TransformFunctionType.Dct8 => new Av1Dct8Inverse1dTransformer(), + Av1TransformFunctionType.Dct16 => new Av1Dct16Inverse1dTransformer(), + Av1TransformFunctionType.Dct32 => new Av1Dct32Inverse1dTransformer(), + Av1TransformFunctionType.Dct64 => new Av1Dct64Inverse1dTransformer(), Av1TransformFunctionType.Adst4 => new Av1Adst4Inverse1dTransformer(), - Av1TransformFunctionType.Adst8 => null, // new Av1Adst8Inverse1dTransformer(), - Av1TransformFunctionType.Adst16 => null, // new Av1Adst16Inverse1dTransformer(), - Av1TransformFunctionType.Adst32 => null, // new Av1Adst32Inverse1dTransformer(), + Av1TransformFunctionType.Adst8 => new Av1Adst8Inverse1dTransformer(), + Av1TransformFunctionType.Adst16 => new Av1Adst16Inverse1dTransformer(), + Av1TransformFunctionType.Adst32 => new Av1Adst32Inverse1dTransformer(), Av1TransformFunctionType.Identity4 => new Av1Identity4Inverse1dTransformer(), Av1TransformFunctionType.Identity8 => new Av1Identity8Inverse1dTransformer(), Av1TransformFunctionType.Identity16 => new Av1Identity16Inverse1dTransformer(), @@ -483,8 +483,7 @@ public class Av1InverseTransformTests private static int GetMaximumError(Span expected, Span actual) { int maximumErrorInTest = 0; - int count = Math.Min(expected.Length, 32); - for (int ni = 0; ni < count; ++ni) + for (int ni = 0; ni < expected.Length; ++ni) { maximumErrorInTest = Math.Max(maximumErrorInTest, Math.Abs(Convert.ToInt32(actual[ni], CultureInfo.InvariantCulture) - Convert.ToInt32(expected[ni], CultureInfo.InvariantCulture))); }