Browse Source

Use HW color conversion

js/color-alpha-handling
James Jackson-South 6 years ago
parent
commit
b8081fd3e9
  1. 23
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 74
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
  3. 156
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  4. 40
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs

23
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -16,6 +16,29 @@ namespace SixLabors.ImageSharp
{
public static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
/// </summary>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static Vector256<float> MultiplyAdd(
in Vector256<float> va,
in Vector256<float> vm0,
in Vector256<float> vm1)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(vm1, vm0, va);
}
else
{
return Avx.Add(Avx.Multiply(vm0, vm1), va);
}
}
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>

74
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

@ -1,11 +1,15 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
using SixLabors.ImageSharp.Tuples;
// ReSharper disable ImpureMethodCallOnReadonlyValueField
@ -47,6 +51,71 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-halfValue);
var scale = Vector256.Create(1 / maxValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(0.344136F);
var gCrMult = Vector256.Create(0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
Vector4 vo = Vector4.One;
Vector128<float> valpha = Unsafe.As<Vector4, Vector128<float>>(ref vo);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector256<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector256<float>>(ref rr);
ref Vector256<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector256<float>>(ref gg);
ref Vector256<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector256<float>>(ref bb);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g = Avx.Subtract(Avx.Subtract(y, Avx.Multiply(cb, gCbMult)), Avx.Multiply(cr, gCrMult));
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.PackAvx2(ref rr, ref gg, ref bb, in valpha, in vcontrol);
}
#else
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
@ -104,6 +173,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
#endif
}
}
}

156
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -190,95 +190,97 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
#pragma warning disable SA1132 // Do not combine fields
public Vector4 V0, V1, V2, V3, V4, V5, V6, V7;
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ...
/// </summary>
public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b)
[MethodImpl(InliningOptions.ShortMethod)]
public void PackAvx2(
ref Vector4Pair r,
ref Vector4Pair g,
ref Vector4Pair b,
in Vector128<float> a,
in Vector256<int> vcontrol)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
Vector4 vo = Vector4.One;
Vector128<float> valpha = Unsafe.As<Vector4, Vector128<float>>(ref vo);
Vector256<float> r0 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector256<float>>(ref r.A),
Unsafe.As<Vector4, Vector128<float>>(ref g.A),
1);
ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskDeinterleave8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
Vector256<float> r1 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector256<float>>(ref b.A),
a,
1);
Vector256<float> r0 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref r.A).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref g.A),
1);
Vector256<float> r2 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref r.B).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref g.B),
1);
Vector256<float> r1 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref b.A).ToVector256(),
valpha,
1);
Vector256<float> r3 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref b.B).ToVector256(),
a,
1);
Vector256<float> r2 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref r.B).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref g.B),
1);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackHigh(r0, r1);
Vector256<float> r3 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref b.B).ToVector256(),
valpha,
1);
Unsafe.As<Vector4, Vector256<float>>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol);
Unsafe.As<Vector4, Vector256<float>>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackHigh(r0, r1);
Vector256<float> t4 = Avx.UnpackLow(r2, r3);
Vector256<float> t6 = Avx.UnpackHigh(r2, r3);
Unsafe.As<Vector4, Vector256<float>>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol);
Unsafe.As<Vector4, Vector256<float>>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol);
Vector256<float> t4 = Avx.UnpackLow(r2, r3);
Vector256<float> t6 = Avx.UnpackHigh(r2, r3);
Unsafe.As<Vector4, Vector256<float>>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol);
Unsafe.As<Vector4, Vector256<float>>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol);
}
else
Unsafe.As<Vector4, Vector256<float>>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol);
Unsafe.As<Vector4, Vector256<float>>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol);
}
#endif
{
this.V0.X = r.A.X;
this.V0.Y = g.A.X;
this.V0.Z = b.A.X;
this.V0.W = 1f;
this.V1.X = r.A.Y;
this.V1.Y = g.A.Y;
this.V1.Z = b.A.Y;
this.V1.W = 1f;
this.V2.X = r.A.Z;
this.V2.Y = g.A.Z;
this.V2.Z = b.A.Z;
this.V2.W = 1f;
this.V3.X = r.A.W;
this.V3.Y = g.A.W;
this.V3.Z = b.A.W;
this.V3.W = 1f;
this.V4.X = r.B.X;
this.V4.Y = g.B.X;
this.V4.Z = b.B.X;
this.V4.W = 1f;
this.V5.X = r.B.Y;
this.V5.Y = g.B.Y;
this.V5.Z = b.B.Y;
this.V5.W = 1f;
this.V6.X = r.B.Z;
this.V6.Y = g.B.Z;
this.V6.Z = b.B.Z;
this.V6.W = 1f;
this.V7.X = r.B.W;
this.V7.Y = g.B.W;
this.V7.Z = b.B.W;
this.V7.W = 1f;
}
/// <summary>
/// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ...
/// </summary>
public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b)
{
this.V0.X = r.A.X;
this.V0.Y = g.A.X;
this.V0.Z = b.A.X;
this.V0.W = 1f;
this.V1.X = r.A.Y;
this.V1.Y = g.A.Y;
this.V1.Z = b.A.Y;
this.V1.W = 1f;
this.V2.X = r.A.Z;
this.V2.Y = g.A.Z;
this.V2.Z = b.A.Z;
this.V2.W = 1f;
this.V3.X = r.A.W;
this.V3.Y = g.A.W;
this.V3.Z = b.A.W;
this.V3.W = 1f;
this.V4.X = r.B.X;
this.V4.Y = g.B.X;
this.V4.Z = b.B.X;
this.V4.W = 1f;
this.V5.X = r.B.Y;
this.V5.Y = g.B.Y;
this.V5.Z = b.B.Y;
this.V5.W = 1f;
this.V6.X = r.B.Z;
this.V6.Y = g.B.Z;
this.V6.Z = b.B.Z;
this.V6.W = 1f;
this.V7.X = r.B.W;
this.V7.Y = g.B.W;
this.V7.Z = b.B.W;
this.V7.W = 1f;
}
}
}

40
tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs

@ -1,40 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Tuples;
using static SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters.JpegColorConverter;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Vector4OctetPack
{
private static Vector4Pair r = new Vector4Pair
{
A = new Vector4(1, 2, 3, 4),
B = new Vector4(5, 6, 7, 8)
};
private static Vector4Pair g = new Vector4Pair
{
A = new Vector4(9, 10, 11, 12),
B = new Vector4(13, 14, 15, 16)
};
private static Vector4Pair b = new Vector4Pair
{
A = new Vector4(17, 18, 19, 20),
B = new Vector4(21, 22, 23, 24)
};
[Benchmark]
public void Pack()
{
Vector4Octet v = default;
v.Pack(ref r, ref g, ref b);
}
}
}
Loading…
Cancel
Save