Browse Source

Add AVX backed Block8x8F Transpose method

js/color-alpha-handling
James Jackson-South 5 years ago
parent
commit
3091072e38
  1. 6
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs
  2. 6
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt
  3. 83
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  4. 6
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
  5. 45
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs
  6. 22
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

6
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs

@ -10,12 +10,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// <summary>
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeIntoFallback(ref Block8x8F d)
{
d.V0L.X = V0L.X;
d.V1L.X = V0L.Y;

6
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt

@ -23,12 +23,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// <summary>
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeIntoFallback(ref Block8x8F d)
{
<#
PushIndent(" ");

83
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -6,6 +6,10 @@ using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Text;
// ReSharper disable InconsistentNaming
@ -596,5 +600,84 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
/// <summary>
/// Transpose the block into the destination block.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
this.TransposeIntoAvx(ref d);
}
else
#endif
{
this.TransposeIntoFallback(ref d);
}
}
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// AVX-only variant for executing <see cref="TransposeInto(ref Block8x8F)"/>.
/// <see href="https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536"/>
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeIntoAvx(ref Block8x8F d)
{
ref Vector256<float> r0 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V0L);
ref Vector256<float> r1 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V1L);
ref Vector256<float> r2 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V2L);
ref Vector256<float> r3 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V3L);
ref Vector256<float> r4 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V4L);
ref Vector256<float> r5 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V5L);
ref Vector256<float> r6 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V6L);
ref Vector256<float> r7 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V7L);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
// Controls generated via
// _MM_SHUFFLE(fp3, fp2, fp1, fp0)(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
const byte Control1_0_1_0 = 0b1_00_01_00; // 1, 0, 1, 0
const byte Control3_2_3_2 = 0b11_10_11_10; // 3, 2, 3, 2
r0 = Avx.Shuffle(t0, t2, Control1_0_1_0);
r1 = Avx.Shuffle(t0, t2, Control3_2_3_2);
r2 = Avx.Shuffle(t1, t3, Control1_0_1_0);
r3 = Avx.Shuffle(t1, t3, Control3_2_3_2);
r4 = Avx.Shuffle(t4, t6, Control1_0_1_0);
r5 = Avx.Shuffle(t4, t6, Control3_2_3_2);
r6 = Avx.Shuffle(t5, t7, Control1_0_1_0);
r7 = Avx.Shuffle(t5, t7, Control3_2_3_2);
t0 = Avx.Permute2x128(r0, r4, 0x20);
t1 = Avx.Permute2x128(r1, r5, 0x20);
t2 = Avx.Permute2x128(r2, r6, 0x20);
t3 = Avx.Permute2x128(r3, r7, 0x20);
t4 = Avx.Permute2x128(r0, r4, 0x31);
t5 = Avx.Permute2x128(r1, r5, 0x31);
t6 = Avx.Permute2x128(r2, r6, 0x31);
t7 = Avx.Permute2x128(r3, r7, 0x31);
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = t0;
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = t1;
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = t2;
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = t3;
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = t4;
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = t5;
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = t6;
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = t7;
}
#endif
}
}

6
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;
@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
// TODO: Transpose is a bottleneck now. We need full AVX support to optimize it:
// https://github.com/dotnet/corefx/issues/22940
src.TransposeInto(ref temp);
IDCT8x4_LeftPart(ref temp, ref dest);
@ -340,4 +338,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
dest.MultiplyInplace(C_0_125);
}
}
}
}

45
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

@ -0,0 +1,45 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
public class Block8x8F_Transpose
{
private static readonly Block8x8F Source = Create8x8FloatData();
[Benchmark]
public void TransposeIntoVector4()
{
var dest = default(Block8x8F);
Source.TransposeIntoFallback(ref dest);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void TransposeIntoAvx()
{
var dest = default(Block8x8F);
Source.TransposeIntoAvx(ref dest);
}
#endif
private static Block8x8F Create8x8FloatData()
{
var result = new float[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[(i * 8) + j] = (i * 10) + j;
}
}
var source = default(Block8x8F);
source.LoadFrom(result);
return source;
}
}
}

22
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
source.LoadFrom(Create8x8FloatData());
var dest = default(Block8x8F);
source.TransposeInto(ref dest);
source.TransposeIntoFallback(ref dest);
float[] actual = new float[64];
dest.ScaledCopyTo(actual);
@ -180,6 +180,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Assert.Equal(expected, actual);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void TransposeIntoAvx()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
var source = default(Block8x8F);
source.LoadFrom(Create8x8FloatData());
var dest = default(Block8x8F);
source.TransposeIntoAvx(ref dest);
float[] actual = new float[64];
dest.ScaledCopyTo(actual);
Assert.Equal(expected, actual);
}
#endif
private class BufferHolder
{
public Block8x8F Buffer;

Loading…
Cancel
Save