mirror of https://github.com/SixLabors/ImageSharp
26 changed files with 453 additions and 315 deletions
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity16ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Adst16Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Adst16ForwardTransformer : IAv1ForwardTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity32ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Adst32Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Adst32ForwardTransformer : IAv1ForwardTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity4ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Adst4Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity8ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Adst8Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Dct8ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Dct16Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Dct16ForwardTransformer : IAv1ForwardTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Adst8ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Dct32Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -0,0 +1,67 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Dct4Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> TransformScalar(ref input, ref output, cosBit); |
|||
|
|||
private static void TransformScalar(ref int input, ref int output, int cosBit) |
|||
{ |
|||
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
|||
ref int bf0 = ref output; |
|||
ref int bf1 = ref output; |
|||
Span<int> stepSpan = new int[4]; |
|||
ref int step0 = ref stepSpan[0]; |
|||
ref int step1 = ref Unsafe.Add(ref step0, 1); |
|||
ref int step2 = ref Unsafe.Add(ref step0, 2); |
|||
ref int step3 = ref Unsafe.Add(ref step0, 3); |
|||
ref int output1 = ref Unsafe.Add(ref output, 1); |
|||
ref int output2 = ref Unsafe.Add(ref output, 2); |
|||
ref int output3 = ref Unsafe.Add(ref output, 3); |
|||
|
|||
// stage 0;
|
|||
|
|||
// stage 1;
|
|||
output = input + Unsafe.Add(ref input, 3); |
|||
output1 = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 2); |
|||
output2 = -Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 1); |
|||
output3 = -Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 0); |
|||
|
|||
// stage 2
|
|||
step0 = HalfBtf(cospi[32], output, cospi[32], output1, cosBit); |
|||
step1 = HalfBtf(-cospi[32], output1, cospi[32], output, cosBit); |
|||
step2 = HalfBtf(cospi[48], output2, cospi[16], output3, cosBit); |
|||
step3 = HalfBtf(cospi[48], output3, -cospi[16], output2, cosBit); |
|||
|
|||
// stage 3
|
|||
output = step0; |
|||
output1 = step2; |
|||
output2 = step1; |
|||
output3 = step3; |
|||
} |
|||
|
|||
private static int HalfBtf(int w0, int in0, int w1, int in1, int bit) |
|||
{ |
|||
long result64 = (long)(w0 * in0) + (w1 * in1); |
|||
long intermediate = result64 + (1L << (bit - 1)); |
|||
|
|||
// NOTE(david.barker): The value 'result_64' may not necessarily fit
|
|||
// into 32 bits. However, the result of this function is nominally
|
|||
// ROUND_POWER_OF_TWO_64(result_64, bit)
|
|||
// and that is required to fit into stage_range[stage] many bits
|
|||
// (checked by range_check_buf()).
|
|||
//
|
|||
// Here we've unpacked that rounding operation, and it can be shown
|
|||
// that the value of 'intermediate' here *does* fit into 32 bits
|
|||
// for any conformant bitstream.
|
|||
// The upshot is that, if you do all this calculation using
|
|||
// wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic,
|
|||
// then you'll still get the correct result.
|
|||
return (int)(intermediate >> bit); |
|||
} |
|||
} |
|||
@ -1,138 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Dct4ForwardTransformer : IAv1ForwardTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
{ |
|||
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
|||
ref int bf0 = ref output; |
|||
ref int bf1 = ref output; |
|||
Span<int> stepSpan = new int[4]; |
|||
ref int step0 = ref stepSpan[0]; |
|||
ref int step1 = ref Unsafe.Add(ref step0, 1); |
|||
ref int step2 = ref Unsafe.Add(ref step0, 2); |
|||
ref int step3 = ref Unsafe.Add(ref step0, 3); |
|||
ref int output1 = ref Unsafe.Add(ref output, 1); |
|||
ref int output2 = ref Unsafe.Add(ref output, 2); |
|||
ref int output3 = ref Unsafe.Add(ref output, 3); |
|||
|
|||
// stage 0;
|
|||
|
|||
// stage 1;
|
|||
output = input + Unsafe.Add(ref input, 3); |
|||
output1 = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 2); |
|||
output2 = -Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 1); |
|||
output3 = -Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 0); |
|||
|
|||
// stage 2
|
|||
step0 = HalfBtf(cospi[32], output, cospi[32], output1, cosBit); |
|||
step1 = HalfBtf(-cospi[32], output1, cospi[32], output, cosBit); |
|||
step2 = HalfBtf(cospi[48], output2, cospi[16], output3, cosBit); |
|||
step3 = HalfBtf(cospi[48], output3, -cospi[16], output2, cosBit); |
|||
|
|||
// stage 3
|
|||
output = step0; |
|||
output1 = step2; |
|||
output2 = step1; |
|||
output3 = step3; |
|||
} |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException("Too small block for Vector implementation, use TransformSse() method instead."); |
|||
|
|||
/// <summary>
|
|||
/// SVT: fdct4x4_sse4_1
|
|||
/// </summary>
|
|||
public static void TransformSse(ref Vector128<int> input, ref Vector128<int> output, byte cosBit, int columnNumber) |
|||
{ |
|||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
|||
|
|||
// We only use stage-2 bit;
|
|||
// shift[0] is used in load_buffer_4x4()
|
|||
// shift[1] is used in txfm_func_col()
|
|||
// shift[2] is used in txfm_func_row()
|
|||
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
|||
Vector128<int> cospi32 = Vector128.Create<int>(cospi[32]); |
|||
Vector128<int> cospi48 = Vector128.Create<int>(cospi[48]); |
|||
Vector128<int> cospi16 = Vector128.Create<int>(cospi[16]); |
|||
Vector128<int> rnding = Vector128.Create<int>(1 << (cosBit - 1)); |
|||
Vector128<int> s0, s1, s2, s3; |
|||
Vector128<int> u0, u1, u2, u3; |
|||
Vector128<int> v0, v1, v2, v3; |
|||
|
|||
int endidx = 3 * columnNumber; |
|||
s0 = Sse2.Add(input, Unsafe.Add(ref input, endidx)); |
|||
s3 = Sse2.Subtract(input, Unsafe.Add(ref input, endidx)); |
|||
endidx -= columnNumber; |
|||
s1 = Sse2.Add(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
|||
s2 = Sse2.Subtract(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
|||
|
|||
// btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit);
|
|||
u0 = Sse41.MultiplyLow(s0, cospi32); |
|||
u1 = Sse41.MultiplyLow(s1, cospi32); |
|||
u2 = Sse2.Add(u0, u1); |
|||
v0 = Sse2.Subtract(u0, u1); |
|||
|
|||
u3 = Sse2.Add(u2, rnding); |
|||
v1 = Sse2.Add(v0, rnding); |
|||
|
|||
u0 = Sse2.ShiftRightArithmetic(u3, cosBit); |
|||
u2 = Sse2.ShiftRightArithmetic(v1, cosBit); |
|||
|
|||
// btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit);
|
|||
v0 = Sse41.MultiplyLow(s2, cospi48); |
|||
v1 = Sse41.MultiplyLow(s3, cospi16); |
|||
v2 = Sse2.Add(v0, v1); |
|||
|
|||
v3 = Sse2.Add(v2, rnding); |
|||
u1 = Sse2.ShiftRightArithmetic(v3, cosBit); |
|||
|
|||
v0 = Sse41.MultiplyLow(s2, cospi16); |
|||
v1 = Sse41.MultiplyLow(s3, cospi48); |
|||
v2 = Sse2.Subtract(v1, v0); |
|||
|
|||
v3 = Sse2.Add(v2, rnding); |
|||
u3 = Sse2.ShiftRightArithmetic(v3, cosBit); |
|||
|
|||
// Note: shift[1] and shift[2] are zeros
|
|||
|
|||
// Transpose 4x4 32-bit
|
|||
v0 = Sse2.UnpackLow(u0, u1); |
|||
v1 = Sse2.UnpackHigh(u0, u1); |
|||
v2 = Sse2.UnpackLow(u2, u3); |
|||
v3 = Sse2.UnpackHigh(u2, u3); |
|||
|
|||
output = Sse2.UnpackLow(v0.AsInt64(), v2.AsInt64()).AsInt32(); |
|||
Unsafe.Add(ref output, 1) = Sse2.UnpackHigh(v0.AsInt64(), v2.AsInt64()).AsInt32(); |
|||
Unsafe.Add(ref output, 2) = Sse2.UnpackLow(v1.AsInt64(), v3.AsInt64()).AsInt32(); |
|||
Unsafe.Add(ref output, 3) = Sse2.UnpackHigh(v1.AsInt64(), v3.AsInt64()).AsInt32(); |
|||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
|||
} |
|||
|
|||
private static int HalfBtf(int w0, int in0, int w1, int in1, int bit) |
|||
{ |
|||
long result64 = (long)(w0 * in0) + (w1 * in1); |
|||
long intermediate = result64 + (1L << (bit - 1)); |
|||
|
|||
// NOTE(david.barker): The value 'result_64' may not necessarily fit
|
|||
// into 32 bits. However, the result of this function is nominally
|
|||
// ROUND_POWER_OF_TWO_64(result_64, bit)
|
|||
// and that is required to fit into stage_range[stage] many bits
|
|||
// (checked by range_check_buf()).
|
|||
//
|
|||
// Here we've unpacked that rounding operation, and it can be shown
|
|||
// that the value of 'intermediate' here *does* fit into 32 bits
|
|||
// for any conformant bitstream.
|
|||
// The upshot is that, if you do all this calculation using
|
|||
// wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic,
|
|||
// then you'll still get the correct result.
|
|||
return (int)(intermediate >> bit); |
|||
} |
|||
} |
|||
@ -0,0 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Dct64Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Dct64ForwardTransformer : IAv1ForwardTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Dct32ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Dct8Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -0,0 +1,100 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1DctDct4Forward2dTransformer : Av1Forward2dTransformerBase |
|||
{ |
|||
private readonly Av1Transform2dFlipConfiguration config = new(Av1TransformType.DctDct, Av1TransformSize.Size4x4); |
|||
private readonly Av1Dct4Forward1dTransformer transformer = new(); |
|||
private readonly int[] temp = new int[Av1Constants.MaxTransformSize * Av1Constants.MaxTransformSize]; |
|||
|
|||
public void Transform(ref short input, ref int output, int cosBit, int columnNumber) |
|||
{ |
|||
/*if (Vector256.IsHardwareAccelerated) |
|||
{ |
|||
Span<Vector128<int>> inputVectors = stackalloc Vector128<int>[16]; |
|||
ref Vector128<int> outputAsVector = ref Unsafe.As<int, Vector128<int>>(ref output); |
|||
TransformVector(ref inputVectors[0], ref outputAsVector, cosBit, columnNumber); |
|||
} |
|||
else*/ |
|||
{ |
|||
Transform2dCore(this.transformer, this.transformer, ref input, 4, ref output, this.config, ref this.temp[0], 8); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// SVT: fdct4x4_sse4_1
|
|||
/// </summary>
|
|||
private static void TransformVector(ref Vector128<int> input, ref Vector128<int> output, int cosBit, int columnNumber) |
|||
{ |
|||
// We only use stage-2 bit;
|
|||
// shift[0] is used in load_buffer_4x4()
|
|||
// shift[1] is used in txfm_func_col()
|
|||
// shift[2] is used in txfm_func_row()
|
|||
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
|||
Vector128<int> cospi32 = Vector128.Create<int>(cospi[32]); |
|||
Vector128<int> cospi48 = Vector128.Create<int>(cospi[48]); |
|||
Vector128<int> cospi16 = Vector128.Create<int>(cospi[16]); |
|||
Vector128<int> rnding = Vector128.Create<int>(1 << (cosBit - 1)); |
|||
Vector128<int> s0, s1, s2, s3; |
|||
Vector128<int> u0, u1, u2, u3; |
|||
Vector128<int> v0, v1, v2, v3; |
|||
Vector256<int> interleave32 = Vector256.Create(0, 4, 1, 5, 2, 6, 3, 7); |
|||
Vector256<int> reverse64 = Vector256.Create(1, 0, 3, 2, 5, 4, 7, 6); |
|||
Vector256<int> select64 = Vector256.Create(0, 0, -1, -1, 0, 0, -1, -1); |
|||
|
|||
int endidx = 3 * columnNumber; |
|||
s0 = Vector128.Add(input, Unsafe.Add(ref input, endidx)); |
|||
s3 = Vector128.Subtract(input, Unsafe.Add(ref input, endidx)); |
|||
endidx -= columnNumber; |
|||
s1 = Vector128.Add(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
|||
s2 = Vector128.Subtract(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
|||
|
|||
// btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit);
|
|||
u0 = Vector128.Multiply(s0, cospi32); |
|||
u1 = Vector128.Multiply(s1, cospi32); |
|||
u2 = Vector128.Add(u0, u1); |
|||
v0 = Vector128.Subtract(u0, u1); |
|||
|
|||
u3 = Vector128.Add(u2, rnding); |
|||
v1 = Vector128.Add(v0, rnding); |
|||
|
|||
u0 = Vector128.ShiftRightArithmetic(u3, cosBit); |
|||
u2 = Vector128.ShiftRightArithmetic(v1, cosBit); |
|||
|
|||
// btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit);
|
|||
v0 = Vector128.Multiply(s2, cospi48); |
|||
v1 = Vector128.Multiply(s3, cospi16); |
|||
v2 = Vector128.Add(v0, v1); |
|||
|
|||
v3 = Vector128.Add(v2, rnding); |
|||
u1 = Vector128.ShiftRightArithmetic(v3, cosBit); |
|||
|
|||
v0 = Vector128.Multiply(s2, cospi16); |
|||
v1 = Vector128.Multiply(s3, cospi48); |
|||
v2 = Vector128.Subtract(v1, v0); |
|||
|
|||
v3 = Vector128.Add(v2, rnding); |
|||
u3 = Vector128.ShiftRightArithmetic(v3, cosBit); |
|||
|
|||
// Note: shift[1] and shift[2] are zeros
|
|||
|
|||
// Transpose 4x4 32-bit
|
|||
Vector256<int> w0 = Vector256.Create(u0, u1); |
|||
Vector256<int> w1 = Vector256.Create(u2, u3); |
|||
w0 = Vector256.Shuffle(w0, interleave32); |
|||
w1 = Vector256.Shuffle(w1, interleave32); |
|||
Vector256<int> w2 = Vector256.ConditionalSelect(select64, w0, w1); |
|||
Vector256<int> w3 = Vector256.ConditionalSelect(select64, w1, w0); |
|||
w3 = Vector256.Shuffle(w3, reverse64); |
|||
|
|||
output = Vector256.GetLower(w2); |
|||
Unsafe.Add(ref output, 1) = Vector256.GetLower(w3); |
|||
Unsafe.Add(ref output, 2) = Vector256.GetUpper(w2); |
|||
Unsafe.Add(ref output, 3) = Vector256.GetUpper(w3); |
|||
} |
|||
} |
|||
@ -0,0 +1,186 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal abstract class Av1Forward2dTransformerBase |
|||
{ |
|||
private const int NewSqrt = 5793; |
|||
private const int NewSqrtBitCount = 12; |
|||
|
|||
/// <summary>
|
|||
/// SVT: av1_tranform_two_d_core_c
|
|||
/// </summary>
|
|||
protected static void Transform2dCore<TColumn, TRow>(TColumn transformFunctionColumn, TRow transformFunctionRow, ref short input, uint inputStride, ref int output, Av1Transform2dFlipConfiguration config, ref int buf, int bitDepth) |
|||
where TColumn : IAv1Forward1dTransformer |
|||
where TRow : IAv1Forward1dTransformer |
|||
{ |
|||
int c, r; |
|||
|
|||
// Note when assigning txfm_size_col, we use the txfm_size from the
|
|||
// row configuration and vice versa. This is intentionally done to
|
|||
// accurately perform rectangular transforms. When the transform is
|
|||
// rectangular, the number of columns will be the same as the
|
|||
// txfm_size stored in the row cfg struct. It will make no difference
|
|||
// for square transforms.
|
|||
int transformColumnCount = config.TransformSize.GetWidth(); |
|||
int transformRowCount = config.TransformSize.GetHeight(); |
|||
int transformCount = transformColumnCount * transformRowCount; |
|||
|
|||
// Take the shift from the larger dimension in the rectangular case.
|
|||
Span<int> shift = config.Shift; |
|||
int rectangleType = GetRectangularRatio(transformColumnCount, transformRowCount); |
|||
Span<byte> stageRangeColumn = stackalloc byte[Av1Transform2dFlipConfiguration.MaxStageNumber]; |
|||
Span<byte> stageRangeRow = stackalloc byte[Av1Transform2dFlipConfiguration.MaxStageNumber]; |
|||
|
|||
// assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
|
|||
// assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
|
|||
config.GenerateStageRange(bitDepth); |
|||
|
|||
int cosBitColumn = config.CosBitColumn; |
|||
int cosBitRow = config.CosBitRow; |
|||
|
|||
// ASSERT(txfm_func_col != NULL);
|
|||
// ASSERT(txfm_func_row != NULL);
|
|||
// use output buffer as temp buffer
|
|||
ref int tempIn = ref output; |
|||
ref int tempOut = ref Unsafe.Add(ref output, transformRowCount); |
|||
|
|||
// Columns
|
|||
for (c = 0; c < transformColumnCount; ++c) |
|||
{ |
|||
if (!config.FlipUpsideDown) |
|||
{ |
|||
uint t = (uint)c; |
|||
for (r = 0; r < transformRowCount; ++r) |
|||
{ |
|||
Unsafe.Add(ref tempIn, r) = Unsafe.Add(ref input, t); |
|||
t += inputStride; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
uint t = (uint)(c + ((transformRowCount - 1) * (int)inputStride)); |
|||
for (r = 0; r < transformRowCount; ++r) |
|||
{ |
|||
// flip upside down
|
|||
Unsafe.Add(ref tempIn, r) = Unsafe.Add(ref input, t); |
|||
t -= inputStride; |
|||
} |
|||
} |
|||
|
|||
RoundShiftArray(ref tempIn, transformRowCount, -shift[0]); // NM svt_av1_round_shift_array_c
|
|||
transformFunctionColumn.Transform(ref tempIn, ref tempOut, cosBitColumn, stageRangeColumn); |
|||
RoundShiftArray(ref tempOut, transformRowCount, -shift[1]); // NM svt_av1_round_shift_array_c
|
|||
if (!config.FlipLeftToRight) |
|||
{ |
|||
int t = c; |
|||
for (r = 0; r < transformRowCount; ++r) |
|||
{ |
|||
Unsafe.Add(ref buf, t) = Unsafe.Add(ref tempOut, r); |
|||
t += transformColumnCount; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
int t = transformColumnCount - c - 1; |
|||
for (r = 0; r < transformRowCount; ++r) |
|||
{ |
|||
// flip from left to right
|
|||
Unsafe.Add(ref buf, t) = Unsafe.Add(ref tempOut, r); |
|||
t += transformColumnCount; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Rows
|
|||
for (r = 0; r < transformRowCount; ++r) |
|||
{ |
|||
transformFunctionRow.Transform(ref Unsafe.Add(ref buf, r * transformColumnCount), ref Unsafe.Add(ref output, r * transformColumnCount), cosBitRow, stageRangeRow); |
|||
RoundShiftArray(ref Unsafe.Add(ref output, r * transformColumnCount), transformColumnCount, -shift[2]); |
|||
|
|||
if (Math.Abs(rectangleType) == 1) |
|||
{ |
|||
// Multiply everything by Sqrt2 if the transform is rectangular and the
|
|||
// size difference is a factor of 2.
|
|||
for (c = 0; c < transformColumnCount; ++c) |
|||
{ |
|||
ref int current = ref Unsafe.Add(ref output, (r * transformColumnCount) + c); |
|||
current = Av1Math.RoundShift((long)current * NewSqrt, NewSqrtBitCount); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
private static void RoundShiftArray(ref int arr, int size, int bit) |
|||
{ |
|||
if (bit == 0) |
|||
{ |
|||
return; |
|||
} |
|||
else |
|||
{ |
|||
nuint sz = (nuint)size; |
|||
if (bit > 0) |
|||
{ |
|||
for (nuint i = 0; i < sz; i++) |
|||
{ |
|||
ref int a = ref Unsafe.Add(ref arr, i); |
|||
a = Av1Math.RoundShift(a, bit); |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
for (nuint i = 0; i < sz; i++) |
|||
{ |
|||
ref int a = ref Unsafe.Add(ref arr, i); |
|||
a *= 1 << (-bit); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// SVT: get_rect_tx_log_ratio
|
|||
/// </summary>
|
|||
public static int GetRectangularRatio(int col, int row) |
|||
{ |
|||
if (col == row) |
|||
{ |
|||
return 0; |
|||
} |
|||
|
|||
if (col > row) |
|||
{ |
|||
if (col == row * 2) |
|||
{ |
|||
return 1; |
|||
} |
|||
|
|||
if (col == row * 4) |
|||
{ |
|||
return 2; |
|||
} |
|||
|
|||
Guard.IsTrue(false, nameof(row), "Unsupported transform size"); |
|||
} |
|||
else |
|||
{ |
|||
if (row == col * 2) |
|||
{ |
|||
return -1; |
|||
} |
|||
|
|||
if (row == col * 4) |
|||
{ |
|||
return -2; |
|||
} |
|||
|
|||
Guard.IsTrue(false, nameof(row), "Unsupported transform size"); |
|||
} |
|||
|
|||
return 0; // Invalid
|
|||
} |
|||
} |
|||
@ -1,15 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Adst4ForwardTransformer : IAv1ForwardTransformer |
|||
internal class Av1Identity16Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -0,0 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity32Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -0,0 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity4Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -0,0 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity64Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -1,15 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity64ForwardTransformer : IAv1ForwardTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
|
|||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -0,0 +1,10 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|||
|
|||
internal class Av1Identity8Forward1dTransformer : IAv1Forward1dTransformer |
|||
{ |
|||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|||
=> throw new NotImplementedException(); |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform; |
|||
|
|||
/// <summary>
|
|||
/// Implementation of a specific forward 1-dimensional transform function.
|
|||
/// </summary>
|
|||
internal interface IAv1Forward1dTransformer |
|||
{ |
|||
/// <summary>
|
|||
/// Execute the 1 dimensional transformation.
|
|||
/// </summary>
|
|||
/// <param name="input">Input pixels.</param>
|
|||
/// <param name="output">Output coefficients.</param>
|
|||
/// <param name="cosBit">The cosinus bit.</param>
|
|||
/// <param name="stageRange">Stage ranges.</param>
|
|||
void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange); |
|||
} |
|||
@ -1,31 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform; |
|||
|
|||
/// <summary>
|
|||
/// Implementation of a specific forward transform function.
|
|||
/// </summary>
|
|||
internal interface IAv1ForwardTransformer |
|||
{ |
|||
/// <summary>
|
|||
/// Execute the transformation.
|
|||
/// </summary>
|
|||
/// <param name="input">Input pixels.</param>
|
|||
/// <param name="output">Output coefficients.</param>
|
|||
/// <param name="cosBit">The cosinus bit.</param>
|
|||
/// <param name="stageRange">Stage ranges.</param>
|
|||
void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange); |
|||
|
|||
/// <summary>
|
|||
/// Execute the transformation using <see cref="Avx2"/> instructions.
|
|||
/// </summary>
|
|||
/// <param name="input">Array of input vectors.</param>
|
|||
/// <param name="output">Array of output coefficients vectors.</param>
|
|||
/// <param name="cosBit">The cosinus bit.</param>
|
|||
/// <param name="columnNumber">The column number to process.</param>
|
|||
void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber); |
|||
} |
|||
Loading…
Reference in new issue