|
|
|
@ -3,6 +3,10 @@ |
|
|
|
|
|
|
|
using System.Numerics; |
|
|
|
using System.Runtime.CompilerServices; |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
using System.Runtime.Intrinsics; |
|
|
|
using System.Runtime.Intrinsics.X86; |
|
|
|
#endif
|
|
|
|
|
|
|
|
// ReSharper disable InconsistentNaming
|
|
|
|
namespace SixLabors.ImageSharp.Formats.Jpeg.Components |
|
|
|
@ -38,6 +42,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components |
|
|
|
private const float C_0_765367 = 0.765366865f; |
|
|
|
|
|
|
|
private const float C_0_125 = 0.1250f; |
|
|
|
|
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
private static readonly Vector256<float> C_V_0_5411 = Vector256.Create(0.541196f); |
|
|
|
private static readonly Vector256<float> C_V_1_3065 = Vector256.Create(1.306563f); |
|
|
|
private static readonly Vector256<float> C_V_1_1758 = Vector256.Create(1.175876f); |
|
|
|
private static readonly Vector256<float> C_V_0_7856 = Vector256.Create(0.785695f); |
|
|
|
private static readonly Vector256<float> C_V_1_3870 = Vector256.Create(1.387040f); |
|
|
|
private static readonly Vector256<float> C_V_0_2758 = Vector256.Create(0.275899f); |
|
|
|
|
|
|
|
private static Vector256<float> C_V_InvSqrt2 = Vector256.Create(0.707107f); |
|
|
|
#endif
|
|
|
|
#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore
|
|
|
|
private static readonly Vector4 InvSqrt2 = new Vector4(0.707107f); |
|
|
|
|
|
|
|
@ -308,12 +323,107 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components |
|
|
|
d.V7R = c0 - c3; |
|
|
|
} |
|
|
|
|
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
/// <summary>
|
|
|
|
///
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="s">Source</param>
|
|
|
|
/// <param name="d">Destination</param>
|
|
|
|
private static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) |
|
|
|
{ |
|
|
|
Vector256<float> t0 = Avx.Add(s.V0, s.V7); |
|
|
|
Vector256<float> t7 = Avx.Subtract(s.V0, s.V7); |
|
|
|
Vector256<float> t1 = Avx.Add(s.V1, s.V6); |
|
|
|
Vector256<float> t6 = Avx.Subtract(s.V1, s.V6); |
|
|
|
Vector256<float> t2 = Avx.Add(s.V2, s.V5); |
|
|
|
Vector256<float> t5 = Avx.Subtract(s.V2, s.V5); |
|
|
|
Vector256<float> t3 = Avx.Add(s.V3, s.V4); |
|
|
|
Vector256<float> t4 = Avx.Subtract(s.V3, s.V4); |
|
|
|
|
|
|
|
Vector256<float> c0 = Avx.Add(t0, t3); |
|
|
|
Vector256<float> c1 = Avx.Add(t1, t2); |
|
|
|
|
|
|
|
// 0 4
|
|
|
|
d.V0 = Avx.Add(c0, c1); |
|
|
|
d.V4 = Avx.Subtract(c0, c1); |
|
|
|
|
|
|
|
Vector256<float> c3 = Avx.Subtract(t0, t3); |
|
|
|
Vector256<float> c2 = Avx.Subtract(t1, t2); |
|
|
|
|
|
|
|
// 2 6
|
|
|
|
if (Fma.IsSupported) |
|
|
|
{ |
|
|
|
d.V2 = Fma.MultiplyAdd(c2, C_V_0_5411, Avx.Multiply(c3, C_V_1_3065)); |
|
|
|
d.V6 = Fma.MultiplySubtract(c3, C_V_0_5411, Avx.Multiply(c2, C_V_1_3065)); |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
d.V2 = Avx.Add(Avx.Multiply(c2, C_V_0_5411), Avx.Multiply(c3, C_V_1_3065)); |
|
|
|
d.V6 = Avx.Subtract(Avx.Multiply(c3, C_V_0_5411), Avx.Multiply(c2, C_V_1_3065)); |
|
|
|
} |
|
|
|
|
|
|
|
if (Fma.IsSupported) |
|
|
|
{ |
|
|
|
c3 = Fma.MultiplyAdd(t4, C_V_1_1758, Avx.Multiply(t7, C_V_0_7856)); |
|
|
|
c0 = Fma.MultiplySubtract(t7, C_V_1_1758, Avx.Multiply(t4, C_V_0_7856)); |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
c3 = Avx.Add(Avx.Multiply(t4, C_V_1_1758), Avx.Multiply(t7, C_V_0_7856)); |
|
|
|
c0 = Avx.Subtract(Avx.Multiply(t7, C_V_1_1758), Avx.Multiply(t4, C_V_0_7856)); |
|
|
|
} |
|
|
|
|
|
|
|
if (Fma.IsSupported) |
|
|
|
{ |
|
|
|
c2 = Fma.MultiplyAdd(t5, C_V_1_3870, Avx.Multiply(C_V_0_2758, t6)); |
|
|
|
c1 = Fma.MultiplySubtract(t6, C_V_1_3870, Avx.Multiply(C_V_0_2758, t5)); |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
c2 = Avx.Add(Avx.Multiply(t5, C_V_1_3870), Avx.Multiply(C_V_0_2758, t6)); |
|
|
|
c1 = Avx.Subtract(Avx.Multiply(t6, C_V_1_3870), Avx.Multiply(C_V_0_2758, t5)); |
|
|
|
} |
|
|
|
|
|
|
|
// 3 5
|
|
|
|
d.V3 = Avx.Subtract(c0, c2); |
|
|
|
d.V5 = Avx.Subtract(c3, c1); |
|
|
|
|
|
|
|
c0 = Avx.Multiply(Avx.Add(c0, c2), C_V_InvSqrt2); |
|
|
|
c3 = Avx.Multiply(Avx.Add(c3, c1), C_V_InvSqrt2); |
|
|
|
|
|
|
|
// 1 7
|
|
|
|
d.V1 = Avx.Add(c0, c3); |
|
|
|
d.V7 = Avx.Subtract(c0, c3); |
|
|
|
} |
|
|
|
#endif
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization)
|
|
|
|
/// Performs 8x8 matrix Forward Discrete Cosine Transform
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="s">Source</param>
|
|
|
|
/// <param name="d">Destination</param>
|
|
|
|
public static void FDCT8x8(ref Block8x8F s, ref Block8x8F d) |
|
|
|
{ |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
if (Avx.IsSupported) |
|
|
|
{ |
|
|
|
FDCT8x8_Avx(ref s, ref d); |
|
|
|
} |
|
|
|
else |
|
|
|
#endif
|
|
|
|
{ |
|
|
|
FDCT8x4_LeftPart(ref s, ref d); |
|
|
|
FDCT8x4_RightPart(ref s, ref d); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Apply floating point FDCT from src into dest
|
|
|
|
/// </summary>
|
|
|
|
/// <remarks></remarks>
|
|
|
|
/// <param name="src">Source</param>
|
|
|
|
/// <param name="dest">Destination</param>
|
|
|
|
/// <param name="temp">Temporary block provided by the caller</param>
|
|
|
|
/// <param name="temp">Temporary block provided by the caller for optimization</param>
|
|
|
|
/// <param name="offsetSourceByNeg128">If true, a constant -128.0 offset is applied for all values before FDCT </param>
|
|
|
|
public static void TransformFDCT( |
|
|
|
ref Block8x8F src, |
|
|
|
@ -327,13 +437,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components |
|
|
|
temp.AddInPlace(-128F); |
|
|
|
} |
|
|
|
|
|
|
|
FDCT8x4_LeftPart(ref temp, ref dest); |
|
|
|
FDCT8x4_RightPart(ref temp, ref dest); |
|
|
|
FDCT8x8(ref temp, ref dest); |
|
|
|
|
|
|
|
dest.TransposeInto(ref temp); |
|
|
|
|
|
|
|
FDCT8x4_LeftPart(ref temp, ref dest); |
|
|
|
FDCT8x4_RightPart(ref temp, ref dest); |
|
|
|
FDCT8x8(ref temp, ref dest); |
|
|
|
|
|
|
|
dest.MultiplyInPlace(C_0_125); |
|
|
|
} |
|
|
|
|