mirror of https://github.com/SixLabors/ImageSharp
26 changed files with 453 additions and 315 deletions
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Identity16ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Adst16Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -1,15 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|
||||
|
|
||||
internal class Av1Adst16ForwardTransformer : IAv1ForwardTransformer |
|
||||
{ |
|
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|
||||
=> throw new NotImplementedException(); |
|
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
|
||||
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Identity32ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Adst32Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -1,15 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|
||||
|
|
||||
internal class Av1Adst32ForwardTransformer : IAv1ForwardTransformer |
|
||||
{ |
|
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|
||||
=> throw new NotImplementedException(); |
|
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
|
||||
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Identity4ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Adst4Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Identity8ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Adst8Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Dct8ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Dct16Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -1,15 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|
||||
|
|
||||
internal class Av1Dct16ForwardTransformer : IAv1ForwardTransformer |
|
||||
{ |
|
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|
||||
=> throw new NotImplementedException(); |
|
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
|
||||
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Adst8ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Dct32Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -0,0 +1,67 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
using System.Runtime.CompilerServices; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal class Av1Dct4Forward1dTransformer : IAv1Forward1dTransformer |
||||
|
{ |
||||
|
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
|
=> TransformScalar(ref input, ref output, cosBit); |
||||
|
|
||||
|
private static void TransformScalar(ref int input, ref int output, int cosBit) |
||||
|
{ |
||||
|
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
||||
|
ref int bf0 = ref output; |
||||
|
ref int bf1 = ref output; |
||||
|
Span<int> stepSpan = new int[4]; |
||||
|
ref int step0 = ref stepSpan[0]; |
||||
|
ref int step1 = ref Unsafe.Add(ref step0, 1); |
||||
|
ref int step2 = ref Unsafe.Add(ref step0, 2); |
||||
|
ref int step3 = ref Unsafe.Add(ref step0, 3); |
||||
|
ref int output1 = ref Unsafe.Add(ref output, 1); |
||||
|
ref int output2 = ref Unsafe.Add(ref output, 2); |
||||
|
ref int output3 = ref Unsafe.Add(ref output, 3); |
||||
|
|
||||
|
// stage 0;
|
||||
|
|
||||
|
// stage 1;
|
||||
|
output = input + Unsafe.Add(ref input, 3); |
||||
|
output1 = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 2); |
||||
|
output2 = -Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 1); |
||||
|
output3 = -Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 0); |
||||
|
|
||||
|
// stage 2
|
||||
|
step0 = HalfBtf(cospi[32], output, cospi[32], output1, cosBit); |
||||
|
step1 = HalfBtf(-cospi[32], output1, cospi[32], output, cosBit); |
||||
|
step2 = HalfBtf(cospi[48], output2, cospi[16], output3, cosBit); |
||||
|
step3 = HalfBtf(cospi[48], output3, -cospi[16], output2, cosBit); |
||||
|
|
||||
|
// stage 3
|
||||
|
output = step0; |
||||
|
output1 = step2; |
||||
|
output2 = step1; |
||||
|
output3 = step3; |
||||
|
} |
||||
|
|
||||
|
private static int HalfBtf(int w0, int in0, int w1, int in1, int bit) |
||||
|
{ |
||||
|
long result64 = (long)(w0 * in0) + (w1 * in1); |
||||
|
long intermediate = result64 + (1L << (bit - 1)); |
||||
|
|
||||
|
// NOTE(david.barker): The value 'result_64' may not necessarily fit
|
||||
|
// into 32 bits. However, the result of this function is nominally
|
||||
|
// ROUND_POWER_OF_TWO_64(result_64, bit)
|
||||
|
// and that is required to fit into stage_range[stage] many bits
|
||||
|
// (checked by range_check_buf()).
|
||||
|
//
|
||||
|
// Here we've unpacked that rounding operation, and it can be shown
|
||||
|
// that the value of 'intermediate' here *does* fit into 32 bits
|
||||
|
// for any conformant bitstream.
|
||||
|
// The upshot is that, if you do all this calculation using
|
||||
|
// wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic,
|
||||
|
// then you'll still get the correct result.
|
||||
|
return (int)(intermediate >> bit); |
||||
|
} |
||||
|
} |
||||
@ -1,138 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Runtime.CompilerServices; |
|
||||
using System.Runtime.Intrinsics; |
|
||||
using System.Runtime.Intrinsics.X86; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|
||||
|
|
||||
internal class Av1Dct4ForwardTransformer : IAv1ForwardTransformer |
|
||||
{ |
|
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|
||||
{ |
|
||||
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
|
||||
ref int bf0 = ref output; |
|
||||
ref int bf1 = ref output; |
|
||||
Span<int> stepSpan = new int[4]; |
|
||||
ref int step0 = ref stepSpan[0]; |
|
||||
ref int step1 = ref Unsafe.Add(ref step0, 1); |
|
||||
ref int step2 = ref Unsafe.Add(ref step0, 2); |
|
||||
ref int step3 = ref Unsafe.Add(ref step0, 3); |
|
||||
ref int output1 = ref Unsafe.Add(ref output, 1); |
|
||||
ref int output2 = ref Unsafe.Add(ref output, 2); |
|
||||
ref int output3 = ref Unsafe.Add(ref output, 3); |
|
||||
|
|
||||
// stage 0;
|
|
||||
|
|
||||
// stage 1;
|
|
||||
output = input + Unsafe.Add(ref input, 3); |
|
||||
output1 = Unsafe.Add(ref input, 1) + Unsafe.Add(ref input, 2); |
|
||||
output2 = -Unsafe.Add(ref input, 2) + Unsafe.Add(ref input, 1); |
|
||||
output3 = -Unsafe.Add(ref input, 3) + Unsafe.Add(ref input, 0); |
|
||||
|
|
||||
// stage 2
|
|
||||
step0 = HalfBtf(cospi[32], output, cospi[32], output1, cosBit); |
|
||||
step1 = HalfBtf(-cospi[32], output1, cospi[32], output, cosBit); |
|
||||
step2 = HalfBtf(cospi[48], output2, cospi[16], output3, cosBit); |
|
||||
step3 = HalfBtf(cospi[48], output3, -cospi[16], output2, cosBit); |
|
||||
|
|
||||
// stage 3
|
|
||||
output = step0; |
|
||||
output1 = step2; |
|
||||
output2 = step1; |
|
||||
output3 = step3; |
|
||||
} |
|
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException("Too small block for Vector implementation, use TransformSse() method instead."); |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// SVT: fdct4x4_sse4_1
|
|
||||
/// </summary>
|
|
||||
public static void TransformSse(ref Vector128<int> input, ref Vector128<int> output, byte cosBit, int columnNumber) |
|
||||
{ |
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
|
||||
|
|
||||
// We only use stage-2 bit;
|
|
||||
// shift[0] is used in load_buffer_4x4()
|
|
||||
// shift[1] is used in txfm_func_col()
|
|
||||
// shift[2] is used in txfm_func_row()
|
|
||||
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
|
||||
Vector128<int> cospi32 = Vector128.Create<int>(cospi[32]); |
|
||||
Vector128<int> cospi48 = Vector128.Create<int>(cospi[48]); |
|
||||
Vector128<int> cospi16 = Vector128.Create<int>(cospi[16]); |
|
||||
Vector128<int> rnding = Vector128.Create<int>(1 << (cosBit - 1)); |
|
||||
Vector128<int> s0, s1, s2, s3; |
|
||||
Vector128<int> u0, u1, u2, u3; |
|
||||
Vector128<int> v0, v1, v2, v3; |
|
||||
|
|
||||
int endidx = 3 * columnNumber; |
|
||||
s0 = Sse2.Add(input, Unsafe.Add(ref input, endidx)); |
|
||||
s3 = Sse2.Subtract(input, Unsafe.Add(ref input, endidx)); |
|
||||
endidx -= columnNumber; |
|
||||
s1 = Sse2.Add(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
|
||||
s2 = Sse2.Subtract(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
|
||||
|
|
||||
// btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit);
|
|
||||
u0 = Sse41.MultiplyLow(s0, cospi32); |
|
||||
u1 = Sse41.MultiplyLow(s1, cospi32); |
|
||||
u2 = Sse2.Add(u0, u1); |
|
||||
v0 = Sse2.Subtract(u0, u1); |
|
||||
|
|
||||
u3 = Sse2.Add(u2, rnding); |
|
||||
v1 = Sse2.Add(v0, rnding); |
|
||||
|
|
||||
u0 = Sse2.ShiftRightArithmetic(u3, cosBit); |
|
||||
u2 = Sse2.ShiftRightArithmetic(v1, cosBit); |
|
||||
|
|
||||
// btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit);
|
|
||||
v0 = Sse41.MultiplyLow(s2, cospi48); |
|
||||
v1 = Sse41.MultiplyLow(s3, cospi16); |
|
||||
v2 = Sse2.Add(v0, v1); |
|
||||
|
|
||||
v3 = Sse2.Add(v2, rnding); |
|
||||
u1 = Sse2.ShiftRightArithmetic(v3, cosBit); |
|
||||
|
|
||||
v0 = Sse41.MultiplyLow(s2, cospi16); |
|
||||
v1 = Sse41.MultiplyLow(s3, cospi48); |
|
||||
v2 = Sse2.Subtract(v1, v0); |
|
||||
|
|
||||
v3 = Sse2.Add(v2, rnding); |
|
||||
u3 = Sse2.ShiftRightArithmetic(v3, cosBit); |
|
||||
|
|
||||
// Note: shift[1] and shift[2] are zeros
|
|
||||
|
|
||||
// Transpose 4x4 32-bit
|
|
||||
v0 = Sse2.UnpackLow(u0, u1); |
|
||||
v1 = Sse2.UnpackHigh(u0, u1); |
|
||||
v2 = Sse2.UnpackLow(u2, u3); |
|
||||
v3 = Sse2.UnpackHigh(u2, u3); |
|
||||
|
|
||||
output = Sse2.UnpackLow(v0.AsInt64(), v2.AsInt64()).AsInt32(); |
|
||||
Unsafe.Add(ref output, 1) = Sse2.UnpackHigh(v0.AsInt64(), v2.AsInt64()).AsInt32(); |
|
||||
Unsafe.Add(ref output, 2) = Sse2.UnpackLow(v1.AsInt64(), v3.AsInt64()).AsInt32(); |
|
||||
Unsafe.Add(ref output, 3) = Sse2.UnpackHigh(v1.AsInt64(), v3.AsInt64()).AsInt32(); |
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
|
||||
} |
|
||||
|
|
||||
private static int HalfBtf(int w0, int in0, int w1, int in1, int bit) |
|
||||
{ |
|
||||
long result64 = (long)(w0 * in0) + (w1 * in1); |
|
||||
long intermediate = result64 + (1L << (bit - 1)); |
|
||||
|
|
||||
// NOTE(david.barker): The value 'result_64' may not necessarily fit
|
|
||||
// into 32 bits. However, the result of this function is nominally
|
|
||||
// ROUND_POWER_OF_TWO_64(result_64, bit)
|
|
||||
// and that is required to fit into stage_range[stage] many bits
|
|
||||
// (checked by range_check_buf()).
|
|
||||
//
|
|
||||
// Here we've unpacked that rounding operation, and it can be shown
|
|
||||
// that the value of 'intermediate' here *does* fit into 32 bits
|
|
||||
// for any conformant bitstream.
|
|
||||
// The upshot is that, if you do all this calculation using
|
|
||||
// wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic,
|
|
||||
// then you'll still get the correct result.
|
|
||||
return (int)(intermediate >> bit); |
|
||||
} |
|
||||
} |
|
||||
@ -0,0 +1,10 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal class Av1Dct64Forward1dTransformer : IAv1Forward1dTransformer |
||||
|
{ |
||||
|
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
|
=> throw new NotImplementedException(); |
||||
|
} |
||||
@ -1,15 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|
||||
|
|
||||
internal class Av1Dct64ForwardTransformer : IAv1ForwardTransformer |
|
||||
{ |
|
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|
||||
=> throw new NotImplementedException(); |
|
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
|
||||
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Dct32ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Dct8Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -0,0 +1,100 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
using System.Runtime.CompilerServices; |
||||
|
using System.Runtime.Intrinsics; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal class Av1DctDct4Forward2dTransformer : Av1Forward2dTransformerBase |
||||
|
{ |
||||
|
private readonly Av1Transform2dFlipConfiguration config = new(Av1TransformType.DctDct, Av1TransformSize.Size4x4); |
||||
|
private readonly Av1Dct4Forward1dTransformer transformer = new(); |
||||
|
private readonly int[] temp = new int[Av1Constants.MaxTransformSize * Av1Constants.MaxTransformSize]; |
||||
|
|
||||
|
public void Transform(ref short input, ref int output, int cosBit, int columnNumber) |
||||
|
{ |
||||
|
/*if (Vector256.IsHardwareAccelerated) |
||||
|
{ |
||||
|
Span<Vector128<int>> inputVectors = stackalloc Vector128<int>[16]; |
||||
|
ref Vector128<int> outputAsVector = ref Unsafe.As<int, Vector128<int>>(ref output); |
||||
|
TransformVector(ref inputVectors[0], ref outputAsVector, cosBit, columnNumber); |
||||
|
} |
||||
|
else*/ |
||||
|
{ |
||||
|
Transform2dCore(this.transformer, this.transformer, ref input, 4, ref output, this.config, ref this.temp[0], 8); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// <summary>
|
||||
|
/// SVT: fdct4x4_sse4_1
|
||||
|
/// </summary>
|
||||
|
private static void TransformVector(ref Vector128<int> input, ref Vector128<int> output, int cosBit, int columnNumber) |
||||
|
{ |
||||
|
// We only use stage-2 bit;
|
||||
|
// shift[0] is used in load_buffer_4x4()
|
||||
|
// shift[1] is used in txfm_func_col()
|
||||
|
// shift[2] is used in txfm_func_row()
|
||||
|
Span<int> cospi = Av1SinusConstants.CosinusPi(cosBit); |
||||
|
Vector128<int> cospi32 = Vector128.Create<int>(cospi[32]); |
||||
|
Vector128<int> cospi48 = Vector128.Create<int>(cospi[48]); |
||||
|
Vector128<int> cospi16 = Vector128.Create<int>(cospi[16]); |
||||
|
Vector128<int> rnding = Vector128.Create<int>(1 << (cosBit - 1)); |
||||
|
Vector128<int> s0, s1, s2, s3; |
||||
|
Vector128<int> u0, u1, u2, u3; |
||||
|
Vector128<int> v0, v1, v2, v3; |
||||
|
Vector256<int> interleave32 = Vector256.Create(0, 4, 1, 5, 2, 6, 3, 7); |
||||
|
Vector256<int> reverse64 = Vector256.Create(1, 0, 3, 2, 5, 4, 7, 6); |
||||
|
Vector256<int> select64 = Vector256.Create(0, 0, -1, -1, 0, 0, -1, -1); |
||||
|
|
||||
|
int endidx = 3 * columnNumber; |
||||
|
s0 = Vector128.Add(input, Unsafe.Add(ref input, endidx)); |
||||
|
s3 = Vector128.Subtract(input, Unsafe.Add(ref input, endidx)); |
||||
|
endidx -= columnNumber; |
||||
|
s1 = Vector128.Add(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
||||
|
s2 = Vector128.Subtract(Unsafe.Add(ref input, columnNumber), Unsafe.Add(ref input, endidx)); |
||||
|
|
||||
|
// btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit);
|
||||
|
u0 = Vector128.Multiply(s0, cospi32); |
||||
|
u1 = Vector128.Multiply(s1, cospi32); |
||||
|
u2 = Vector128.Add(u0, u1); |
||||
|
v0 = Vector128.Subtract(u0, u1); |
||||
|
|
||||
|
u3 = Vector128.Add(u2, rnding); |
||||
|
v1 = Vector128.Add(v0, rnding); |
||||
|
|
||||
|
u0 = Vector128.ShiftRightArithmetic(u3, cosBit); |
||||
|
u2 = Vector128.ShiftRightArithmetic(v1, cosBit); |
||||
|
|
||||
|
// btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit);
|
||||
|
v0 = Vector128.Multiply(s2, cospi48); |
||||
|
v1 = Vector128.Multiply(s3, cospi16); |
||||
|
v2 = Vector128.Add(v0, v1); |
||||
|
|
||||
|
v3 = Vector128.Add(v2, rnding); |
||||
|
u1 = Vector128.ShiftRightArithmetic(v3, cosBit); |
||||
|
|
||||
|
v0 = Vector128.Multiply(s2, cospi16); |
||||
|
v1 = Vector128.Multiply(s3, cospi48); |
||||
|
v2 = Vector128.Subtract(v1, v0); |
||||
|
|
||||
|
v3 = Vector128.Add(v2, rnding); |
||||
|
u3 = Vector128.ShiftRightArithmetic(v3, cosBit); |
||||
|
|
||||
|
// Note: shift[1] and shift[2] are zeros
|
||||
|
|
||||
|
// Transpose 4x4 32-bit
|
||||
|
Vector256<int> w0 = Vector256.Create(u0, u1); |
||||
|
Vector256<int> w1 = Vector256.Create(u2, u3); |
||||
|
w0 = Vector256.Shuffle(w0, interleave32); |
||||
|
w1 = Vector256.Shuffle(w1, interleave32); |
||||
|
Vector256<int> w2 = Vector256.ConditionalSelect(select64, w0, w1); |
||||
|
Vector256<int> w3 = Vector256.ConditionalSelect(select64, w1, w0); |
||||
|
w3 = Vector256.Shuffle(w3, reverse64); |
||||
|
|
||||
|
output = Vector256.GetLower(w2); |
||||
|
Unsafe.Add(ref output, 1) = Vector256.GetLower(w3); |
||||
|
Unsafe.Add(ref output, 2) = Vector256.GetUpper(w2); |
||||
|
Unsafe.Add(ref output, 3) = Vector256.GetUpper(w3); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,186 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
using System.Runtime.CompilerServices; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal abstract class Av1Forward2dTransformerBase |
||||
|
{ |
||||
|
private const int NewSqrt = 5793; |
||||
|
private const int NewSqrtBitCount = 12; |
||||
|
|
||||
|
/// <summary>
|
||||
|
/// SVT: av1_tranform_two_d_core_c
|
||||
|
/// </summary>
|
||||
|
protected static void Transform2dCore<TColumn, TRow>(TColumn transformFunctionColumn, TRow transformFunctionRow, ref short input, uint inputStride, ref int output, Av1Transform2dFlipConfiguration config, ref int buf, int bitDepth) |
||||
|
where TColumn : IAv1Forward1dTransformer |
||||
|
where TRow : IAv1Forward1dTransformer |
||||
|
{ |
||||
|
int c, r; |
||||
|
|
||||
|
// Note when assigning txfm_size_col, we use the txfm_size from the
|
||||
|
// row configuration and vice versa. This is intentionally done to
|
||||
|
// accurately perform rectangular transforms. When the transform is
|
||||
|
// rectangular, the number of columns will be the same as the
|
||||
|
// txfm_size stored in the row cfg struct. It will make no difference
|
||||
|
// for square transforms.
|
||||
|
int transformColumnCount = config.TransformSize.GetWidth(); |
||||
|
int transformRowCount = config.TransformSize.GetHeight(); |
||||
|
int transformCount = transformColumnCount * transformRowCount; |
||||
|
|
||||
|
// Take the shift from the larger dimension in the rectangular case.
|
||||
|
Span<int> shift = config.Shift; |
||||
|
int rectangleType = GetRectangularRatio(transformColumnCount, transformRowCount); |
||||
|
Span<byte> stageRangeColumn = stackalloc byte[Av1Transform2dFlipConfiguration.MaxStageNumber]; |
||||
|
Span<byte> stageRangeRow = stackalloc byte[Av1Transform2dFlipConfiguration.MaxStageNumber]; |
||||
|
|
||||
|
// assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
|
||||
|
// assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
|
||||
|
config.GenerateStageRange(bitDepth); |
||||
|
|
||||
|
int cosBitColumn = config.CosBitColumn; |
||||
|
int cosBitRow = config.CosBitRow; |
||||
|
|
||||
|
// ASSERT(txfm_func_col != NULL);
|
||||
|
// ASSERT(txfm_func_row != NULL);
|
||||
|
// use output buffer as temp buffer
|
||||
|
ref int tempIn = ref output; |
||||
|
ref int tempOut = ref Unsafe.Add(ref output, transformRowCount); |
||||
|
|
||||
|
// Columns
|
||||
|
for (c = 0; c < transformColumnCount; ++c) |
||||
|
{ |
||||
|
if (!config.FlipUpsideDown) |
||||
|
{ |
||||
|
uint t = (uint)c; |
||||
|
for (r = 0; r < transformRowCount; ++r) |
||||
|
{ |
||||
|
Unsafe.Add(ref tempIn, r) = Unsafe.Add(ref input, t); |
||||
|
t += inputStride; |
||||
|
} |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
uint t = (uint)(c + ((transformRowCount - 1) * (int)inputStride)); |
||||
|
for (r = 0; r < transformRowCount; ++r) |
||||
|
{ |
||||
|
// flip upside down
|
||||
|
Unsafe.Add(ref tempIn, r) = Unsafe.Add(ref input, t); |
||||
|
t -= inputStride; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
RoundShiftArray(ref tempIn, transformRowCount, -shift[0]); // NM svt_av1_round_shift_array_c
|
||||
|
transformFunctionColumn.Transform(ref tempIn, ref tempOut, cosBitColumn, stageRangeColumn); |
||||
|
RoundShiftArray(ref tempOut, transformRowCount, -shift[1]); // NM svt_av1_round_shift_array_c
|
||||
|
if (!config.FlipLeftToRight) |
||||
|
{ |
||||
|
int t = c; |
||||
|
for (r = 0; r < transformRowCount; ++r) |
||||
|
{ |
||||
|
Unsafe.Add(ref buf, t) = Unsafe.Add(ref tempOut, r); |
||||
|
t += transformColumnCount; |
||||
|
} |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
int t = transformColumnCount - c - 1; |
||||
|
for (r = 0; r < transformRowCount; ++r) |
||||
|
{ |
||||
|
// flip from left to right
|
||||
|
Unsafe.Add(ref buf, t) = Unsafe.Add(ref tempOut, r); |
||||
|
t += transformColumnCount; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Rows
|
||||
|
for (r = 0; r < transformRowCount; ++r) |
||||
|
{ |
||||
|
transformFunctionRow.Transform(ref Unsafe.Add(ref buf, r * transformColumnCount), ref Unsafe.Add(ref output, r * transformColumnCount), cosBitRow, stageRangeRow); |
||||
|
RoundShiftArray(ref Unsafe.Add(ref output, r * transformColumnCount), transformColumnCount, -shift[2]); |
||||
|
|
||||
|
if (Math.Abs(rectangleType) == 1) |
||||
|
{ |
||||
|
// Multiply everything by Sqrt2 if the transform is rectangular and the
|
||||
|
// size difference is a factor of 2.
|
||||
|
for (c = 0; c < transformColumnCount; ++c) |
||||
|
{ |
||||
|
ref int current = ref Unsafe.Add(ref output, (r * transformColumnCount) + c); |
||||
|
current = Av1Math.RoundShift((long)current * NewSqrt, NewSqrtBitCount); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void RoundShiftArray(ref int arr, int size, int bit) |
||||
|
{ |
||||
|
if (bit == 0) |
||||
|
{ |
||||
|
return; |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
nuint sz = (nuint)size; |
||||
|
if (bit > 0) |
||||
|
{ |
||||
|
for (nuint i = 0; i < sz; i++) |
||||
|
{ |
||||
|
ref int a = ref Unsafe.Add(ref arr, i); |
||||
|
a = Av1Math.RoundShift(a, bit); |
||||
|
} |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
for (nuint i = 0; i < sz; i++) |
||||
|
{ |
||||
|
ref int a = ref Unsafe.Add(ref arr, i); |
||||
|
a *= 1 << (-bit); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// <summary>
|
||||
|
/// SVT: get_rect_tx_log_ratio
|
||||
|
/// </summary>
|
||||
|
public static int GetRectangularRatio(int col, int row) |
||||
|
{ |
||||
|
if (col == row) |
||||
|
{ |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
if (col > row) |
||||
|
{ |
||||
|
if (col == row * 2) |
||||
|
{ |
||||
|
return 1; |
||||
|
} |
||||
|
|
||||
|
if (col == row * 4) |
||||
|
{ |
||||
|
return 2; |
||||
|
} |
||||
|
|
||||
|
Guard.IsTrue(false, nameof(row), "Unsupported transform size"); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
if (row == col * 2) |
||||
|
{ |
||||
|
return -1; |
||||
|
} |
||||
|
|
||||
|
if (row == col * 4) |
||||
|
{ |
||||
|
return -2; |
||||
|
} |
||||
|
|
||||
|
Guard.IsTrue(false, nameof(row), "Unsupported transform size"); |
||||
|
} |
||||
|
|
||||
|
return 0; // Invalid
|
||||
|
} |
||||
|
} |
||||
@ -1,15 +1,10 @@ |
|||||
// Copyright (c) Six Labors.
|
// Copyright (c) Six Labors.
|
||||
// Licensed under the Six Labors Split License.
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
internal class Av1Adst4ForwardTransformer : IAv1ForwardTransformer |
internal class Av1Identity16Forward1dTransformer : IAv1Forward1dTransformer |
||||
{ |
{ |
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
=> throw new NotImplementedException(); |
=> throw new NotImplementedException(); |
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal class Av1Identity32Forward1dTransformer : IAv1Forward1dTransformer |
||||
|
{ |
||||
|
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
|
=> throw new NotImplementedException(); |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal class Av1Identity4Forward1dTransformer : IAv1Forward1dTransformer |
||||
|
{ |
||||
|
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
|
=> throw new NotImplementedException(); |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal class Av1Identity64Forward1dTransformer : IAv1Forward1dTransformer |
||||
|
{ |
||||
|
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
|
=> throw new NotImplementedException(); |
||||
|
} |
||||
@ -1,15 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
|
||||
|
|
||||
internal class Av1Identity64ForwardTransformer : IAv1ForwardTransformer |
|
||||
{ |
|
||||
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
|
||||
=> throw new NotImplementedException(); |
|
||||
|
|
||||
public void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber) |
|
||||
=> throw new NotImplementedException(); |
|
||||
} |
|
||||
@ -0,0 +1,10 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform.Forward; |
||||
|
|
||||
|
internal class Av1Identity8Forward1dTransformer : IAv1Forward1dTransformer |
||||
|
{ |
||||
|
public void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange) |
||||
|
=> throw new NotImplementedException(); |
||||
|
} |
||||
@ -0,0 +1,19 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform; |
||||
|
|
||||
|
/// <summary>
|
||||
|
/// Implementation of a specific forward 1-dimensional transform function.
|
||||
|
/// </summary>
|
||||
|
internal interface IAv1Forward1dTransformer |
||||
|
{ |
||||
|
/// <summary>
|
||||
|
/// Execute the 1 dimensional transformation.
|
||||
|
/// </summary>
|
||||
|
/// <param name="input">Input pixels.</param>
|
||||
|
/// <param name="output">Output coefficients.</param>
|
||||
|
/// <param name="cosBit">The cosinus bit.</param>
|
||||
|
/// <param name="stageRange">Stage ranges.</param>
|
||||
|
void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange); |
||||
|
} |
||||
@ -1,31 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Runtime.Intrinsics; |
|
||||
using System.Runtime.Intrinsics.X86; |
|
||||
|
|
||||
namespace SixLabors.ImageSharp.Formats.Heif.Av1.Transform; |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// Implementation of a specific forward transform function.
|
|
||||
/// </summary>
|
|
||||
internal interface IAv1ForwardTransformer |
|
||||
{ |
|
||||
/// <summary>
|
|
||||
/// Execute the transformation.
|
|
||||
/// </summary>
|
|
||||
/// <param name="input">Input pixels.</param>
|
|
||||
/// <param name="output">Output coefficients.</param>
|
|
||||
/// <param name="cosBit">The cosinus bit.</param>
|
|
||||
/// <param name="stageRange">Stage ranges.</param>
|
|
||||
void Transform(ref int input, ref int output, int cosBit, Span<byte> stageRange); |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// Execute the transformation using <see cref="Avx2"/> instructions.
|
|
||||
/// </summary>
|
|
||||
/// <param name="input">Array of input vectors.</param>
|
|
||||
/// <param name="output">Array of output coefficients vectors.</param>
|
|
||||
/// <param name="cosBit">The cosinus bit.</param>
|
|
||||
/// <param name="columnNumber">The column number to process.</param>
|
|
||||
void TransformAvx2(ref Vector256<int> input, ref Vector256<int> output, int cosBit, int columnNumber); |
|
||||
} |
|
||||
Loading…
Reference in new issue