Browse Source

Implemented FDCT8x8 using avx instruction set, added backward compatibility for FDCT8x4 calls using FDCT8x8(ref Block8x8F, ref Block8x8F) method

pull/1632/head
Dmitry Pentin 5 years ago
parent
commit
e5188fe4f4
  1. 120
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

120
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -3,6 +3,10 @@
using System.Numerics;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
@ -38,6 +42,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
private const float C_0_765367 = 0.765366865f;
private const float C_0_125 = 0.1250f;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<float> C_V_0_5411 = Vector256.Create(0.541196f);
private static readonly Vector256<float> C_V_1_3065 = Vector256.Create(1.306563f);
private static readonly Vector256<float> C_V_1_1758 = Vector256.Create(1.175876f);
private static readonly Vector256<float> C_V_0_7856 = Vector256.Create(0.785695f);
private static readonly Vector256<float> C_V_1_3870 = Vector256.Create(1.387040f);
private static readonly Vector256<float> C_V_0_2758 = Vector256.Create(0.275899f);
private static Vector256<float> C_V_InvSqrt2 = Vector256.Create(0.707107f);
#endif
#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore
private static readonly Vector4 InvSqrt2 = new Vector4(0.707107f);
@ -308,12 +323,107 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
d.V7R = c0 - c3;
}
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
///
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
private static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
Vector256<float> t0 = Avx.Add(s.V0, s.V7);
Vector256<float> t7 = Avx.Subtract(s.V0, s.V7);
Vector256<float> t1 = Avx.Add(s.V1, s.V6);
Vector256<float> t6 = Avx.Subtract(s.V1, s.V6);
Vector256<float> t2 = Avx.Add(s.V2, s.V5);
Vector256<float> t5 = Avx.Subtract(s.V2, s.V5);
Vector256<float> t3 = Avx.Add(s.V3, s.V4);
Vector256<float> t4 = Avx.Subtract(s.V3, s.V4);
Vector256<float> c0 = Avx.Add(t0, t3);
Vector256<float> c1 = Avx.Add(t1, t2);
// 0 4
d.V0 = Avx.Add(c0, c1);
d.V4 = Avx.Subtract(c0, c1);
Vector256<float> c3 = Avx.Subtract(t0, t3);
Vector256<float> c2 = Avx.Subtract(t1, t2);
// 2 6
if (Fma.IsSupported)
{
d.V2 = Fma.MultiplyAdd(c2, C_V_0_5411, Avx.Multiply(c3, C_V_1_3065));
d.V6 = Fma.MultiplySubtract(c3, C_V_0_5411, Avx.Multiply(c2, C_V_1_3065));
}
else
{
d.V2 = Avx.Add(Avx.Multiply(c2, C_V_0_5411), Avx.Multiply(c3, C_V_1_3065));
d.V6 = Avx.Subtract(Avx.Multiply(c3, C_V_0_5411), Avx.Multiply(c2, C_V_1_3065));
}
if (Fma.IsSupported)
{
c3 = Fma.MultiplyAdd(t4, C_V_1_1758, Avx.Multiply(t7, C_V_0_7856));
c0 = Fma.MultiplySubtract(t7, C_V_1_1758, Avx.Multiply(t4, C_V_0_7856));
}
else
{
c3 = Avx.Add(Avx.Multiply(t4, C_V_1_1758), Avx.Multiply(t7, C_V_0_7856));
c0 = Avx.Subtract(Avx.Multiply(t7, C_V_1_1758), Avx.Multiply(t4, C_V_0_7856));
}
if (Fma.IsSupported)
{
c2 = Fma.MultiplyAdd(t5, C_V_1_3870, Avx.Multiply(C_V_0_2758, t6));
c1 = Fma.MultiplySubtract(t6, C_V_1_3870, Avx.Multiply(C_V_0_2758, t5));
}
else
{
c2 = Avx.Add(Avx.Multiply(t5, C_V_1_3870), Avx.Multiply(C_V_0_2758, t6));
c1 = Avx.Subtract(Avx.Multiply(t6, C_V_1_3870), Avx.Multiply(C_V_0_2758, t5));
}
// 3 5
d.V3 = Avx.Subtract(c0, c2);
d.V5 = Avx.Subtract(c3, c1);
c0 = Avx.Multiply(Avx.Add(c0, c2), C_V_InvSqrt2);
c3 = Avx.Multiply(Avx.Add(c3, c1), C_V_InvSqrt2);
// 1 7
d.V1 = Avx.Add(c0, c3);
d.V7 = Avx.Subtract(c0, c3);
}
#endif
/// <summary>
/// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization)
/// Performs 8x8 matrix Forward Discrete Cosine Transform
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void FDCT8x8(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
FDCT8x8_Avx(ref s, ref d);
}
else
#endif
{
FDCT8x4_LeftPart(ref s, ref d);
FDCT8x4_RightPart(ref s, ref d);
}
}
/// <summary>
/// Apply floating point FDCT from src into dest
/// </summary>
/// <remarks></remarks>
/// <param name="src">Source</param>
/// <param name="dest">Destination</param>
/// <param name="temp">Temporary block provided by the caller</param>
/// <param name="temp">Temporary block provided by the caller for optimization</param>
/// <param name="offsetSourceByNeg128">If true, a constant -128.0 offset is applied for all values before FDCT </param>
public static void TransformFDCT(
ref Block8x8F src,
@ -327,13 +437,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
temp.AddInPlace(-128F);
}
FDCT8x4_LeftPart(ref temp, ref dest);
FDCT8x4_RightPart(ref temp, ref dest);
FDCT8x8(ref temp, ref dest);
dest.TransposeInto(ref temp);
FDCT8x4_LeftPart(ref temp, ref dest);
FDCT8x4_RightPart(ref temp, ref dest);
FDCT8x8(ref temp, ref dest);
dest.MultiplyInPlace(C_0_125);
}

Loading…
Cancel
Save