Browse Source

Made DCT code prettier with SimdUtils, added summary to 8x8 dct methods, added debug assertion

pull/1632/head
Dmitry Pentin 5 years ago
parent
commit
a8f717d781
  1. 59
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs
  2. 19
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

59
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
@ -171,14 +172,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
d.V4R = my3 - mb3;
}
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// Do IDCT internal operations on the given block.
/// Combined operation of <see cref="IDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="IDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> my1 = s.V1;
Vector256<float> my7 = s.V7;
Vector256<float> mz0 = Avx.Add(my1, my7);
@ -191,40 +195,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Vector256<float> mz4 = Avx.Multiply(Avx.Add(mz0, mz1), C_V_1_1758);
if (Fma.IsSupported)
{
mz2 = Fma.MultiplyAdd(mz2, C_V_n1_9615, mz4);
mz3 = Fma.MultiplyAdd(mz3, C_V_n0_3901, mz4);
}
else
{
mz2 = Avx.Add(Avx.Multiply(mz2, C_V_n1_9615), mz4);
mz3 = Avx.Add(Avx.Multiply(mz3, C_V_n0_3901), mz4);
}
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, C_V_n1_9615);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, C_V_n0_3901);
mz0 = Avx.Multiply(mz0, C_V_n0_8999);
mz1 = Avx.Multiply(mz1, C_V_n2_5629);
Vector256<float> mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, C_V_0_2986), mz2);
Vector256<float> mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, C_V_2_0531), mz3);
Vector256<float> mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, C_V_3_0727), mz2);
Vector256<float> mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, C_V_1_5013), mz3);
Unsafe.SkipInit(out Vector256<float> mb3);
Unsafe.SkipInit(out Vector256<float> mb2);
Unsafe.SkipInit(out Vector256<float> mb1);
Unsafe.SkipInit(out Vector256<float> mb0);
if (Fma.IsSupported)
{
mb3 = Avx.Add(Fma.MultiplyAdd(my7, C_V_0_2986, mz0), mz2);
mb2 = Avx.Add(Fma.MultiplyAdd(my5, C_V_2_0531, mz1), mz3);
mb1 = Avx.Add(Fma.MultiplyAdd(my3, C_V_3_0727, mz1), mz2);
mb0 = Avx.Add(Fma.MultiplyAdd(my1, C_V_1_5013, mz0), mz3);
}
else
{
mb3 = Avx.Add(Avx.Add(Avx.Multiply(my7, C_V_0_2986), mz0), mz2);
mb2 = Avx.Add(Avx.Add(Avx.Multiply(my5, C_V_2_0531), mz1), mz3);
mb1 = Avx.Add(Avx.Add(Avx.Multiply(my3, C_V_3_0727), mz1), mz2);
mb0 = Avx.Add(Avx.Add(Avx.Multiply(my1, C_V_1_5013), mz0), mz3);
}
Vector256<float> my2 = s.V2;
Vector256<float> my6 = s.V6;
@ -233,17 +213,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Vector256<float> my4 = s.V4;
mz0 = Avx.Add(my0, my4);
mz1 = Avx.Subtract(my0, my4);
if (Fma.IsSupported)
{
mz2 = Fma.MultiplyAdd(my6, C_V_n1_8477, mz4);
mz3 = Fma.MultiplyAdd(my2, C_V_0_7653, mz4);
}
else
{
mz2 = Avx.Add(Avx.Multiply(my6, C_V_n1_8477), mz4);
mz3 = Avx.Add(Avx.Multiply(my2, C_V_0_7653), mz4);
}
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, C_V_n1_8477);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, C_V_0_7653);
my0 = Avx.Add(mz0, mz3);
my3 = Avx.Subtract(mz0, mz3);
@ -258,7 +229,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
d.V5 = Avx.Subtract(my2, mb2);
d.V3 = Avx.Add(my3, mb3);
d.V4 = Avx.Subtract(my3, mb3);
}
#endif
}
}
}

19
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
@ -196,14 +197,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
d.V7R = c0 - c3;
}
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
///
/// Combined operation of <see cref="FDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="FDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
private static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> t0 = Avx.Add(s.V0, s.V7);
Vector256<float> t7 = Avx.Subtract(s.V0, s.V7);
Vector256<float> t1 = Avx.Add(s.V1, s.V6);
@ -224,36 +228,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Vector256<float> c2 = Avx.Subtract(t1, t2);
// 2 6
d.V2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(c2, C_V_0_5411), c3, C_V_1_3065);
if (Fma.IsSupported)
{
d.V2 = Fma.MultiplyAdd(c2, C_V_0_5411, Avx.Multiply(c3, C_V_1_3065));
d.V6 = Fma.MultiplySubtract(c3, C_V_0_5411, Avx.Multiply(c2, C_V_1_3065));
}
else
{
d.V2 = Avx.Add(Avx.Multiply(c2, C_V_0_5411), Avx.Multiply(c3, C_V_1_3065));
d.V6 = Avx.Subtract(Avx.Multiply(c3, C_V_0_5411), Avx.Multiply(c2, C_V_1_3065));
}
c3 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t4, C_V_1_1758), t7, C_V_0_7856);
if (Fma.IsSupported)
{
c3 = Fma.MultiplyAdd(t4, C_V_1_1758, Avx.Multiply(t7, C_V_0_7856));
c0 = Fma.MultiplySubtract(t7, C_V_1_1758, Avx.Multiply(t4, C_V_0_7856));
}
else
{
c3 = Avx.Add(Avx.Multiply(t4, C_V_1_1758), Avx.Multiply(t7, C_V_0_7856));
c0 = Avx.Subtract(Avx.Multiply(t7, C_V_1_1758), Avx.Multiply(t4, C_V_0_7856));
}
c2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t5, C_V_1_3870), C_V_0_2758, t6);
if (Fma.IsSupported)
{
c2 = Fma.MultiplyAdd(t5, C_V_1_3870, Avx.Multiply(C_V_0_2758, t6));
c1 = Fma.MultiplySubtract(t6, C_V_1_3870, Avx.Multiply(C_V_0_2758, t5));
}
else
{
c2 = Avx.Add(Avx.Multiply(t5, C_V_1_3870), Avx.Multiply(C_V_0_2758, t6));
c1 = Avx.Subtract(Avx.Multiply(t6, C_V_1_3870), Avx.Multiply(C_V_0_2758, t5));
}
@ -267,8 +268,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
// 1 7
d.V1 = Avx.Add(c0, c3);
d.V7 = Avx.Subtract(c0, c3);
}
#endif
}
/// <summary>
/// Performs 8x8 matrix Forward Discrete Cosine Transform

Loading…
Cancel
Save