Browse Source

Merge pull request #1402 from SixLabors/js/vector4octet-pack

Add AVX2 Vector4Octet.Pack implementation
pull/1407/head
James Jackson-South 5 years ago
committed by GitHub
parent
commit
120080b99f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 27
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 76
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
  3. 1
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  4. 4
      tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs

27
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -14,7 +14,32 @@ namespace SixLabors.ImageSharp
{
public static class HwIntrinsics
{
private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
public static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
/// </summary>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static Vector256<float> MultiplyAdd(
in Vector256<float> va,
in Vector256<float> vm0,
in Vector256<float> vm1)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(vm1, vm0, va);
}
else
{
return Avx.Add(Avx.Multiply(vm0, vm1), va);
}
}
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).

76
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

@ -1,11 +1,15 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
using SixLabors.ImageSharp.Tuples;
// ReSharper disable ImpureMethodCallOnReadonlyValueField
@ -47,6 +51,73 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-halfValue);
var scale = Vector256.Create(1 / maxValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
// TODO: We should be savving to RGBA not Vector4
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#else
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
@ -104,6 +175,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
#endif
}
}
}

1
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -4,7 +4,6 @@
using System;
using System.Collections.Generic;
using System.Numerics;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Tuples;

4
tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs

@ -73,7 +73,9 @@ namespace SixLabors.ImageSharp.Benchmarks
}
#endif
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithEnvironmentVariables(new EnvironmentVariable(EnableHWIntrinsic, Off))
.WithEnvironmentVariables(
new EnvironmentVariable(EnableHWIntrinsic, Off),
new EnvironmentVariable(FeatureSIMD, Off))
.WithId("No HwIntrinsics"));
}
}

Loading…
Cancel
Save