Browse Source

Merge pull request #1374 from SixLabors/js/Block8x8F_TransposeAVX

Add Avx backed Block8x8F Transpose method
pull/1383/head
James Jackson-South 5 years ago
committed by GitHub
parent
commit
a1784a67bf
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 6
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs
  2. 6
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt
  3. 97
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  4. 6
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
  5. 45
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs
  6. 24
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

6
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs

@ -10,12 +10,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// <summary>
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeIntoFallback(ref Block8x8F d)
{
d.V0L.X = V0L.X;
d.V1L.X = V0L.Y;

6
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt

@ -23,12 +23,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// <summary>
/// Fallback method to transpose a block into the destination block on non AVX supported CPUs.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeIntoFallback(ref Block8x8F d)
{
<#
PushIndent(" ");

97
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -6,6 +6,10 @@ using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Text;
// ReSharper disable InconsistentNaming
@ -596,5 +600,98 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
/// <summary>
/// Transpose the block into the destination block.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
this.TransposeIntoAvx(ref d);
}
else
#endif
{
this.TransposeIntoFallback(ref d);
}
}
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// AVX-only variant for executing <see cref="TransposeInto(ref Block8x8F)"/>.
/// <see href="https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536"/>
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeIntoAvx(ref Block8x8F d)
{
Vector256<float> r0 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
1);
Vector256<float> r1 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
1);
Vector256<float> r2 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
1);
Vector256<float> r3 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
1);
Vector256<float> r4 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
1);
Vector256<float> r5 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
1);
Vector256<float> r6 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
1);
Vector256<float> r7 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
1);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
}
#endif
}
}

6
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;
@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
// TODO: Transpose is a bottleneck now. We need full AVX support to optimize it:
// https://github.com/dotnet/corefx/issues/22940
src.TransposeInto(ref temp);
IDCT8x4_LeftPart(ref temp, ref dest);
@ -340,4 +338,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
dest.MultiplyInplace(C_0_125);
}
}
}
}

45
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

@ -0,0 +1,45 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
public class Block8x8F_Transpose
{
private static readonly Block8x8F Source = Create8x8FloatData();
[Benchmark(Baseline=true)]
public void TransposeIntoVector4()
{
var dest = default(Block8x8F);
Source.TransposeIntoFallback(ref dest);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void TransposeIntoAvx()
{
var dest = default(Block8x8F);
Source.TransposeIntoAvx(ref dest);
}
#endif
private static Block8x8F Create8x8FloatData()
{
var result = new float[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[(i * 8) + j] = (i * 10) + j;
}
}
var source = default(Block8x8F);
source.LoadFrom(result);
return source;
}
}
}

24
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
}
[Fact]
public void TransposeInto()
public void TransposeIntoFallback()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
source.LoadFrom(Create8x8FloatData());
var dest = default(Block8x8F);
source.TransposeInto(ref dest);
source.TransposeIntoFallback(ref dest);
float[] actual = new float[64];
dest.ScaledCopyTo(actual);
@ -180,6 +180,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Assert.Equal(expected, actual);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void TransposeIntoAvx()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
var source = default(Block8x8F);
source.LoadFrom(Create8x8FloatData());
var dest = default(Block8x8F);
source.TransposeIntoAvx(ref dest);
float[] actual = new float[64];
dest.ScaledCopyTo(actual);
Assert.Equal(expected, actual);
}
#endif
private class BufferHolder
{
public Block8x8F Buffer;

Loading…
Cancel
Save