Browse Source

Refactor JpegColorConverters

js/color-alpha-handling
Nicolas Portmann 5 years ago
parent
commit
e550caaed7
  1. 12
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  2. 22
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs
  3. 18
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs
  4. 81
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
  5. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
  6. 145
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs
  7. 71
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
  8. 63
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
  9. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
  10. 109
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs
  11. 53
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs
  12. 72
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
  13. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
  14. 132
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs
  15. 67
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
  16. 101
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs
  17. 4
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs
  18. 183
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
  19. 30
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs
  20. 87
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs
  21. 110
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs
  22. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs
  23. 193
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs
  24. 91
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs
  25. 18
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs
  26. 46
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs
  27. 99
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  28. 14
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs
  29. 68
      tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs

12
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -25,6 +25,18 @@ namespace SixLabors.ImageSharp
public static bool HasVector8 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
public static bool HasAvx2
{
get
{
#if SUPPORTS_RUNTIME_INTRINSICS
return Avx2.IsSupported;
#else
return false;
#endif
}
}
/// <summary>
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
/// </summary>

22
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs

@ -0,0 +1,22 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter
{
protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision, 8)
{
}
protected sealed override bool IsAvailable => SimdUtils.HasAvx2;
}
}
}

18
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs

@ -0,0 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class BasicJpegColorConverter : JpegColorConverter
{
protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision)
{
}
protected override bool IsAvailable => true;
}
}
}

81
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs

@ -0,0 +1,81 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykAvx2 : Avx2JpegColorConverter
{
public FromCmykAvx2(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> cBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> mBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
k = Avx.Multiply(k, scale);
c = Avx.Multiply(Avx.Multiply(c, k), scale);
m = Avx.Multiply(Avx.Multiply(m, k), scale);
y = Avx.Multiply(Avx.Multiply(y, k), scale);
Vector256<float> cmLo = Avx.UnpackLow(c, m);
Vector256<float> yoLo = Avx.UnpackLow(y, one);
Vector256<float> cmHi = Avx.UnpackHigh(c, m);
Vector256<float> yoHi = Avx.UnpackHigh(y, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs

@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykBasic : JpegColorConverter
internal sealed class FromCmykBasic : BasicJpegColorConverter
{
public FromCmykBasic(int precision)
: base(JpegColorSpace.Cmyk, precision)

145
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs

@ -1,145 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykVector8 : JpegColorConverter
{
public FromCmykVector8(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue);
}
FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> cBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> mBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / maxValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
k = Avx.Multiply(k, scale);
c = Avx.Multiply(Avx.Multiply(c, k), scale);
m = Avx.Multiply(Avx.Multiply(m, k), scale);
y = Avx.Multiply(Avx.Multiply(y, k), scale);
Vector256<float> cmLo = Avx.UnpackLow(c, m);
Vector256<float> yoLo = Avx.UnpackLow(y, one);
Vector256<float> cmHi = Avx.UnpackHigh(c, m);
Vector256<float> yoHi = Avx.UnpackHigh(y, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
}
#else
ref Vector<float> cBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> mBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair cc = default;
Vector4Pair mm = default;
Vector4Pair yy = default;
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);
var scale = new Vector<float>(1 / maxValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> c = Unsafe.Add(ref cBase, i);
Vector<float> m = Unsafe.Add(ref mBase, i);
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> k = Unsafe.Add(ref kBase, i) * scale;
c = (c * k) * scale;
m = (m * k) * scale;
y = (y * k) * scale;
ccRefAsVector = c;
mmRefAsVector = m;
yyRefAsVector = y;
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref cc, ref mm, ref yy);
}
#endif
}
}
}
}

71
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs

@ -0,0 +1,71 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykVector8 : Vector8JpegColorConverter
{
public FromCmykVector8(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> cBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> mBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair cc = default;
Vector4Pair mm = default;
Vector4Pair yy = default;
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);
var scale = new Vector<float>(1 / this.MaximumValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> c = Unsafe.Add(ref cBase, i);
Vector<float> m = Unsafe.Add(ref mBase, i);
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> k = Unsafe.Add(ref kBase, i) * scale;
c = (c * k) * scale;
m = (m * k) * scale;
y = (y * k) * scale;
ccRefAsVector = c;
mmRefAsVector = m;
yyRefAsVector = y;
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref cc, ref mm, ref yy);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

63
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs

@ -0,0 +1,63 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter
{
public FromGrayscaleAvx2(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol);
g = Avx.Multiply(g, scale);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000);
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000);
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs

@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscaleBasic : JpegColorConverter
internal sealed class FromGrayscaleBasic : BasicJpegColorConverter
{
public FromGrayscaleBasic(int precision)
: base(JpegColorSpace.Grayscale, precision)

109
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs

@ -1,109 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscaleVector8 : JpegColorConverter
{
public FromGrayscaleVector8(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue);
}
FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / maxValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol);
g = Avx.Multiply(g, scale);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000);
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000);
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000);
}
#else
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair gg = default;
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
var scale = new Vector<float>(1 / maxValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> g = Unsafe.Add(ref gBase, i);
g *= scale;
ggRefAsVector = g;
// Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref gg);
}
#endif
}
}
}
}

53
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs

@ -0,0 +1,53 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscaleVector8 : Vector8JpegColorConverter
{
public FromGrayscaleVector8(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair gg = default;
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
var scale = new Vector<float>(1 / this.MaximumValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> g = Unsafe.Add(ref gBase, i);
g *= scale;
ggRefAsVector = g;
// Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref gg);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

72
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs

@ -0,0 +1,72 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgbAvx2 : Avx2JpegColorConverter
{
public FromRgbAvx2(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> rBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> bBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale);
Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale);
Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale);
Vector256<float> rgLo = Avx.UnpackLow(r, g);
Vector256<float> boLo = Avx.UnpackLow(b, one);
Vector256<float> rgHi = Avx.UnpackHigh(r, g);
Vector256<float> boHi = Avx.UnpackHigh(b, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs

@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgbBasic : JpegColorConverter
internal sealed class FromRgbBasic : BasicJpegColorConverter
{
public FromRgbBasic(int precision)
: base(JpegColorSpace.RGB, precision)

132
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs

@ -1,132 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgbVector8 : JpegColorConverter
{
public FromRgbVector8(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue);
}
FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> rBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> bBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / maxValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale);
Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale);
Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale);
Vector256<float> rgLo = Avx.UnpackLow(r, g);
Vector256<float> boLo = Avx.UnpackLow(b, one);
Vector256<float> rgHi = Avx.UnpackHigh(r, g);
Vector256<float> boHi = Avx.UnpackHigh(b, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10);
}
#else
ref Vector<float> rBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> bBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / maxValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> r = Unsafe.Add(ref rBase, i);
Vector<float> g = Unsafe.Add(ref gBase, i);
Vector<float> b = Unsafe.Add(ref bBase, i);
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
#endif
}
}
}
}

67
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs

@ -0,0 +1,67 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgbVector8 : Vector8JpegColorConverter
{
public FromRgbVector8(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> rBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> bBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / this.MaximumValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> r = Unsafe.Add(ref rBase, i);
Vector<float> g = Unsafe.Add(ref gBase, i);
Vector<float> b = Unsafe.Add(ref bBase, i);
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

101
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs

@ -0,0 +1,101 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
// ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter
{
public FromYCbCrAvx2(int precision)
: base(JpegColorSpace.YCbCr, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-this.HalfValue);
var scale = Vector256.Create(1 / this.MaximumValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
// TODO: We should be saving to RGBA not Vector4
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

4
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs

@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrBasic : JpegColorConverter
internal sealed class FromYCbCrBasic : BasicJpegColorConverter
{
public FromYCbCrBasic(int precision)
: base(JpegColorSpace.YCbCr, precision)
@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

183
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

@ -1,183 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
// ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrSimdVector8 : JpegColorConverter
{
public FromYCbCrSimdVector8(int precision)
: base(JpegColorSpace.YCbCr, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
}
FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
}
/// <summary>
/// SIMD convert using buffers of sizes divisible by 8.
/// </summary>
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-halfValue);
var scale = Vector256.Create(1 / maxValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
// TODO: We should be saving to RGBA not Vector4
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#else
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-halfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / maxValue);
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector<float> r = y + (cr * new Vector<float>(1.402F));
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
Vector<float> b = y + (cb * new Vector<float>(1.772F));
r = r.FastRound();
g = g.FastRound();
b = b.FastRound();
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
#endif
}
}
}
}

30
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs

@ -5,36 +5,22 @@ using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrSimd : JpegColorConverter
internal sealed class FromYCbCrVector : VectorizedJpegColorConverter
{
public FromYCbCrSimd(int precision)
: base(JpegColorSpace.YCbCr, precision)
public FromYCbCrVector(int precision)
: base(JpegColorSpace.YCbCr, precision, 8)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
}
protected override bool IsAvailable => true;
FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
}
/// <summary>
/// SIMD convert using buffers of sizes divisible by 8.
/// </summary>
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!");
@ -48,7 +34,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector4(-halfValue);
var chromaOffset = new Vector4(-this.HalfValue);
var maxValue = this.MaximumValue;
// Walking 8 elements at one step:
int n = result.Length / 8;
@ -112,6 +99,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
destination.Pack(ref r, ref g, ref b);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

87
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs

@ -0,0 +1,87 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
// ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter
{
public FromYCbCrVector8(int precision)
: base(JpegColorSpace.YCbCr, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-this.HalfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / this.MaximumValue);
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector<float> r = y + (cr * new Vector<float>(1.402F));
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
Vector<float> b = y + (cb * new Vector<float>(1.772F));
r = r.FastRound();
g = g.FastRound();
b = b.FastRound();
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

110
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs

@ -0,0 +1,110 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccKAvx2 : Avx2JpegColorConverter
{
public FromYccKAvx2(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-this.HalfValue);
var scale = Vector256.Create(1 / this.MaximumValue);
var max = Vector256.Create(this.MaximumValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
k = Avx2.PermuteVar8x32(k, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g =
HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
r = Avx.Subtract(max, Avx.RoundToNearestInteger(r));
g = Avx.Subtract(max, Avx.RoundToNearestInteger(g));
b = Avx.Subtract(max, Avx.RoundToNearestInteger(b));
r = Avx.Multiply(Avx.Multiply(r, k), scale);
g = Avx.Multiply(Avx.Multiply(g, k), scale);
b = Avx.Multiply(Avx.Multiply(b, k), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs

@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccKBasic : JpegColorConverter
internal sealed class FromYccKBasic : BasicJpegColorConverter
{
public FromYccKBasic(int precision)
: base(JpegColorSpace.Ycck, precision)

193
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs

@ -1,193 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccKVector8 : JpegColorConverter
{
public FromYccKVector8(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
}
FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-halfValue);
var scale = Vector256.Create(1 / maxValue);
var max = Vector256.Create(maxValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
k = Avx2.PermuteVar8x32(k, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
r = Avx.Subtract(max, Avx.RoundToNearestInteger(r));
g = Avx.Subtract(max, Avx.RoundToNearestInteger(g));
b = Avx.Subtract(max, Avx.RoundToNearestInteger(b));
r = Avx.Multiply(Avx.Multiply(r, k), scale);
g = Avx.Multiply(Avx.Multiply(g, k), scale);
b = Avx.Multiply(Avx.Multiply(b, k), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#else
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-halfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / maxValue);
var max = new Vector<float>(maxValue);
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
Vector<float> k = Unsafe.Add(ref kBase, i) / max;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector<float> r = y + (cr * new Vector<float>(1.402F));
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
Vector<float> b = y + (cb * new Vector<float>(1.772F));
r = (max - r.FastRound()) * k;
g = (max - g.FastRound()) * k;
b = (max - b.FastRound()) * k;
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
#endif
}
}
}
}

91
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs

@ -0,0 +1,91 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccKVector8 : Vector8JpegColorConverter
{
public FromYccKVector8(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-this.HalfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / this.MaximumValue);
var max = new Vector<float>(this.MaximumValue);
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
Vector<float> k = Unsafe.Add(ref kBase, i) / max;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector<float> r = y + (cr * new Vector<float>(1.402F));
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
Vector<float> b = y + (cb * new Vector<float>(1.772F));
r = (max - r.FastRound()) * k;
g = (max - g.FastRound()) * k;
b = (max - b.FastRound()) * k;
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

18
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs

@ -0,0 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter
{
protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision, 8)
{
}
protected sealed override bool IsAvailable => SimdUtils.HasVector8;
}
}
}

46
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs

@ -0,0 +1,46 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class VectorizedJpegColorConverter : JpegColorConverter
{
private readonly int vectorSize;
protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize)
: base(colorSpace, precision)
{
this.vectorSize = vectorSize;
}
public sealed override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % this.vectorSize;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!this.IsAvailable)
{
throw new InvalidOperationException(
"This converter can be used only on architecture having 256 byte floating point SIMD registers!");
}
this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount));
}
this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder));
}
protected abstract void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result);
protected abstract void ConvertCore(in ComponentValues values, Span<Vector4> result);
}
}
}

99
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Tuples;
@ -17,22 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
/// <summary>
/// The available converters
/// </summary>
private static readonly JpegColorConverter[] Converters =
{
// 8-bit converters
GetYCbCrConverter(8),
GetYccKConverter(8),
GetCmykConverter(8),
GetGrayScaleConverter(8),
GetRgbConverter(8),
// 12-bit converters
GetYCbCrConverter(12),
GetYccKConverter(12),
GetCmykConverter(12),
GetGrayScaleConverter(12),
GetRgbConverter(12),
};
private static readonly JpegColorConverter[] Converters = CreateConverters();
/// <summary>
/// Initializes a new instance of the <see cref="JpegColorConverter"/> class.
@ -45,6 +31,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
this.HalfValue = MathF.Ceiling(this.MaximumValue / 2);
}
/// <summary>
/// Gets a value indicating whether this <see cref="JpegColorConverter"/> is available
/// on the current runtime and CPU architecture.
/// </summary>
protected abstract bool IsAvailable { get; }
/// <summary>
/// Gets the <see cref="JpegColorSpace"/> of this converter.
/// </summary>
@ -89,34 +81,79 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
public abstract void ConvertToRgba(in ComponentValues values, Span<Vector4> result);
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the YCbCr colorspace that matches the current CPU architecture.
/// Returns the <see cref="JpegColorConverter"/>s for all supported colorspaces and precisions.
/// </summary>
private static JpegColorConverter GetYCbCrConverter(int precision) =>
FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision);
private static JpegColorConverter[] CreateConverters()
{
var converters = new List<JpegColorConverter>();
// 8-bit converters
converters.AddRange(GetYCbCrConverters(8));
converters.AddRange(GetYccKConverters(8));
converters.AddRange(GetCmykConverters(8));
converters.AddRange(GetGrayScaleConverters(8));
converters.AddRange(GetRgbConverters(8));
// 8-bit converters
converters.AddRange(GetYCbCrConverters(12));
converters.AddRange(GetYccKConverters(12));
converters.AddRange(GetCmykConverters(12));
converters.AddRange(GetGrayScaleConverters(12));
converters.AddRange(GetRgbConverters(12));
return converters.Where(x => x.IsAvailable).ToArray();
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the YccK colorspace that matches the current CPU architecture.
/// Returns the <see cref="JpegColorConverter"/>s for the YCbCr colorspace.
/// </summary>
private static JpegColorConverter GetYccKConverter(int precision) =>
FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision);
private static IEnumerable<JpegColorConverter> GetYCbCrConverters(int precision)
{
yield return new FromYCbCrAvx2(precision);
yield return new FromYCbCrVector8(precision);
yield return new FromYCbCrVector(precision);
yield return new FromYCbCrBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the CMYK colorspace that matches the current CPU architecture.
/// Returns the <see cref="JpegColorConverter"/>s for the YccK colorspace.
/// </summary>
private static JpegColorConverter GetCmykConverter(int precision) =>
FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision);
private static IEnumerable<JpegColorConverter> GetYccKConverters(int precision)
{
yield return new FromYccKAvx2(precision);
yield return new FromYccKVector8(precision);
yield return new FromYccKBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the gray scale colorspace that matches the current CPU architecture.
/// Returns the <see cref="JpegColorConverter"/>s for the CMYK colorspace.
/// </summary>
private static JpegColorConverter GetGrayScaleConverter(int precision) =>
FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision);
private static IEnumerable<JpegColorConverter> GetCmykConverters(int precision)
{
yield return new FromCmykAvx2(precision);
yield return new FromCmykVector8(precision);
yield return new FromCmykBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the RGB colorspace that matches the current CPU architecture.
/// Returns the <see cref="JpegColorConverter"/>s for the gray scale colorspace.
/// </summary>
private static JpegColorConverter GetRgbConverter(int precision) =>
FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision);
private static IEnumerable<JpegColorConverter> GetGrayScaleConverters(int precision)
{
yield return new FromGrayscaleAvx2(precision);
yield return new FromGrayscaleVector8(precision);
yield return new FromGrayscaleBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/>s for the RGB colorspace.
/// </summary>
private static IEnumerable<JpegColorConverter> GetRgbConverters(int precision)
{
yield return new FromRgbAvx2(precision);
yield return new FromRgbVector8(precision);
yield return new FromRgbBasic(precision);
}
/// <summary>
/// A stack-only struct to reference the input buffers using <see cref="ReadOnlySpan{T}"/>-s.

14
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs

@ -41,7 +41,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F);
new JpegColorConverter.FromYCbCrBasic(8).ConvertToRgba(values, this.output);
}
[Benchmark(Baseline = true)]
@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
JpegColorConverter.FromYCbCrSimd.ConvertCore(values, this.output, 255F, 128F);
new JpegColorConverter.FromYCbCrVector(8).ConvertToRgba(values, this.output);
}
[Benchmark]
@ -57,7 +57,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
JpegColorConverter.FromYCbCrSimdVector8.ConvertCore(values, this.output, 255F, 128F);
new JpegColorConverter.FromYCbCrVector8(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVectorAvx2()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output);
}
private static Buffer2D<float>[] CreateRandomValues(

68
tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs

@ -22,6 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
private static readonly ApproximateColorSpaceComparer ColorSpaceComparer = new ApproximateColorSpaceComparer(Precision);
// int inputBufferLength, int resultBufferLength, int seed
public static readonly TheoryData<int, int, int> CommonConversionData =
new TheoryData<int, int, int>
{
@ -51,44 +52,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
seed);
}
private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed)
{
float y = values.Component0[i];
float cb = values.Component1[i];
float cr = values.Component2[i];
var ycbcr = new YCbCr(y, cb, cr);
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = ColorSpaceConverter.ToRgb(ycbcr);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
ValidateRgbToYCbCrConversion(
new JpegColorConverter.FromYCbCrVector(8),
3,
inputBufferLength,
resultBufferLength,
seed);
}
[Theory]
[InlineData(64, 1)]
[InlineData(16, 2)]
[InlineData(8, 3)]
public void FromYCbCrSimd_ConvertCore(int size, int seed)
[MemberData(nameof(CommonConversionData))]
public void FromYCbCrVector8(int inputBufferLength, int resultBufferLength, int seed)
{
JpegColorConverter.ComponentValues values = CreateRandomValues(3, size, seed);
var result = new Vector4[size];
JpegColorConverter.FromYCbCrSimd.ConvertCore(values, result, 255, 128);
for (int i = 0; i < size; i++)
if (!SimdUtils.HasVector8)
{
ValidateYCbCr(values, result, i);
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
}
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateRgbToYCbCrConversion(
new JpegColorConverter.FromYCbCrSimd(8),
new JpegColorConverter.FromYCbCrVector8(8),
3,
inputBufferLength,
resultBufferLength,
@ -97,9 +84,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed)
public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int seed)
{
if (!SimdUtils.HasVector8)
if (!SimdUtils.HasAvx2)
{
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
@ -107,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
// JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s);
ValidateRgbToYCbCrConversion(
new JpegColorConverter.FromYCbCrSimdVector8(8),
new JpegColorConverter.FromYCbCrAvx2(8),
3,
inputBufferLength,
resultBufferLength,
@ -138,7 +125,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1);
var result = new Vector4[count];
JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd(8) : new JpegColorConverter.FromYCbCrBasic(8);
JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8);
// Warm up:
converter.ConvertToRgba(values, result);
@ -331,5 +318,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
ValidateYCbCr(values, result, i);
}
}
private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
{
float y = values.Component0[i];
float cb = values.Component1[i];
float cr = values.Component2[i];
var ycbcr = new YCbCr(y, cb, cr);
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = ColorSpaceConverter.ToRgb(ycbcr);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
}
}

Loading…
Cancel
Save