Browse Source

Add initial vectorized color converter implementation

pull/1411/head
Nicolas Portmann 6 years ago
parent
commit
210d8f7740
  1. 16
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
  2. 145
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs
  3. 13
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
  4. 109
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs
  5. 14
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
  6. 132
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs
  7. 5
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
  8. 25
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs
  9. 193
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs
  10. 86
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  11. 2
      tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs

16
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs

@ -8,16 +8,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmyk : JpegColorConverter
internal sealed class FromCmykBasic : JpegColorConverter
{
public FromCmyk(int precision)
public FromCmykBasic(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ConvertCore(values, result, this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
ReadOnlySpan<float> cVals = values.Component0;
ReadOnlySpan<float> mVals = values.Component1;
ReadOnlySpan<float> yVals = values.Component2;
@ -25,7 +29,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
var v = new Vector4(0, 0, 0, 1F);
var maximum = 1 / this.MaximumValue;
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
for (int i = 0; i < result.Length; i++)
@ -33,7 +37,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
float c = cVals[i];
float m = mVals[i];
float y = yVals[i];
float k = kVals[i] / this.MaximumValue;
float k = kVals[i] / maxValue;
v.X = c * k;
v.Y = m * k;
@ -47,4 +51,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

145
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs

@ -0,0 +1,145 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykVector8 : JpegColorConverter
{
public FromCmykVector8(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue);
}
FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> cBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> mBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / maxValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
k = Avx.Multiply(k, scale);
c = Avx.Multiply(Avx.Multiply(c, k), scale);
m = Avx.Multiply(Avx.Multiply(m, k), scale);
y = Avx.Multiply(Avx.Multiply(y, k), scale);
Vector256<float> cmLo = Avx.UnpackLow(c, m);
Vector256<float> yoLo = Avx.UnpackLow(y, one);
Vector256<float> cmHi = Avx.UnpackHigh(c, m);
Vector256<float> yoHi = Avx.UnpackHigh(y, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
}
#else
ref Vector<float> cBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> mBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair cc = default;
Vector4Pair mm = default;
Vector4Pair yy = default;
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);
var scale = new Vector<float>(1 / maxValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> c = Unsafe.Add(ref cBase, i);
Vector<float> m = Unsafe.Add(ref mBase, i);
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> k = Unsafe.Add(ref kBase, i) * scale;
c = (c * k) * scale;
m = (m * k) * scale;
y = (y * k) * scale;
ccRefAsVector = c;
mmRefAsVector = m;
yyRefAsVector = y;
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref cc, ref mm, ref yy);
}
#endif
}
}
}
}

13
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs

@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscale : JpegColorConverter
internal sealed class FromGrayscaleBasic : JpegColorConverter
{
public FromGrayscale(int precision)
public FromGrayscaleBasic(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
var maximum = 1 / this.MaximumValue;
ConvertCore(values, result, this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
ref float sBase = ref MemoryMarshal.GetReference(values.Component0);
@ -35,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

109
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs

@ -0,0 +1,109 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscaleVector8 : JpegColorConverter
{
public FromGrayscaleVector8(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue);
}
FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / maxValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol);
g = Avx.Multiply(g, scale);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000);
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000);
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000);
}
#else
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair gg = default;
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
var scale = new Vector<float>(1 / maxValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> g = Unsafe.Add(ref gBase, i);
g *= scale;
ggRefAsVector = g;
// Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref gg);
}
#endif
}
}
}
}

14
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs

@ -8,23 +8,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgb : JpegColorConverter
internal sealed class FromRgbBasic : JpegColorConverter
{
public FromRgb(int precision)
public FromRgbBasic(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ConvertCore(values, result, this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
ReadOnlySpan<float> rVals = values.Component0;
ReadOnlySpan<float> gVals = values.Component1;
ReadOnlySpan<float> bVals = values.Component2;
var v = new Vector4(0, 0, 0, 1);
var maximum = 1 / this.MaximumValue;
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
for (int i = 0; i < result.Length; i++)
@ -44,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

132
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs

@ -0,0 +1,132 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgbVector8 : JpegColorConverter
{
public FromRgbVector8(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue);
}
FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> rBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> bBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / maxValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale);
Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale);
Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale);
Vector256<float> rgLo = Avx.UnpackLow(r, g);
Vector256<float> boLo = Avx.UnpackLow(b, one);
Vector256<float> rgHi = Avx.UnpackHigh(r, g);
Vector256<float> boHi = Avx.UnpackHigh(b, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10);
}
#else
ref Vector<float> rBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> bBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / maxValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> r = Unsafe.Add(ref rBase, i);
Vector<float> g = Unsafe.Add(ref gBase, i);
Vector<float> b = Unsafe.Add(ref bBase, i);
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
#endif
}
}
}
}

5
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

@ -9,8 +9,9 @@ using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
#else
using SixLabors.ImageSharp.Tuples;
#endif
// ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
@ -98,7 +99,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
// TODO: We should be savving to RGBA not Vector4
// TODO: We should be saving to RGBA not Vector4
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);

25
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs

@ -8,14 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccK : JpegColorConverter
internal sealed class FromYccKBasic : JpegColorConverter
{
public FromYccK(int precision)
public FromYccKBasic(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ReadOnlySpan<float> yVals = values.Component0;
@ -25,19 +30,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
var v = new Vector4(0, 0, 0, 1F);
var maximum = 1 / this.MaximumValue;
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
for (int i = 0; i < result.Length; i++)
{
float y = yVals[i];
float cb = cbVals[i] - this.HalfValue;
float cr = crVals[i] - this.HalfValue;
float k = kVals[i] / this.MaximumValue;
float cb = cbVals[i] - halfValue;
float cr = crVals[i] - halfValue;
float k = kVals[i] / maxValue;
v.X = (this.MaximumValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
v.Y = (this.MaximumValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k;
v.Z = (this.MaximumValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k;
v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
v.W = 1F;
v *= scale;
@ -47,4 +52,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

193
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs

@ -0,0 +1,193 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#else
using SixLabors.ImageSharp.Tuples;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccKVector8 : JpegColorConverter
{
public FromYccKVector8(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
}
FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!");
}
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-halfValue);
var scale = Vector256.Create(1 / maxValue);
var max = Vector256.Create(maxValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
k = Avx2.PermuteVar8x32(k, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
r = Avx.Subtract(max, Avx.RoundToNearestInteger(r));
g = Avx.Subtract(max, Avx.RoundToNearestInteger(g));
b = Avx.Subtract(max, Avx.RoundToNearestInteger(b));
r = Avx.Multiply(Avx.Multiply(r, k), scale);
g = Avx.Multiply(Avx.Multiply(g, k), scale);
b = Avx.Multiply(Avx.Multiply(b, k), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#else
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-halfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / maxValue);
var max = new Vector<float>(maxValue);
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
Vector<float> k = Unsafe.Add(ref kBase, i) / max;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector<float> r = y + (cr * new Vector<float>(1.402F));
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
Vector<float> b = y + (cb * new Vector<float>(1.772F));
r = (max - r.FastRound()) * k;
g = (max - g.FastRound()) * k;
b = (max - b.FastRound()) * k;
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
#endif
}
}
}
}

86
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -21,17 +21,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
// 8-bit converters
GetYCbCrConverter(8),
new FromYccK(8),
new FromCmyk(8),
new FromGrayscale(8),
new FromRgb(8),
GetYccKConverter(8),
GetCmykConverter(8),
GetGrayScaleConverter(8),
GetRgbConverter(8),
// 12-bit converters
GetYCbCrConverter(12),
new FromYccK(12),
new FromCmyk(12),
new FromGrayscale(12),
new FromRgb(12),
GetYccKConverter(12),
GetCmykConverter(12),
GetGrayScaleConverter(12),
GetRgbConverter(12),
};
/// <summary>
@ -94,6 +94,30 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
private static JpegColorConverter GetYCbCrConverter(int precision) =>
FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision);
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the YccK colorspace that matches the current CPU architecture.
/// </summary>
private static JpegColorConverter GetYccKConverter(int precision) =>
FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision);
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the CMYK colorspace that matches the current CPU architecture.
/// </summary>
private static JpegColorConverter GetCmykConverter(int precision) =>
FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision);
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the gray scale colorspace that matches the current CPU architecture.
/// </summary>
private static JpegColorConverter GetGrayScaleConverter(int precision) =>
FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision);
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the RGB colorspace that matches the current CPU architecture.
/// </summary>
private static JpegColorConverter GetRgbConverter(int precision) =>
FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision);
/// <summary>
/// A stack-only struct to reference the input buffers using <see cref="ReadOnlySpan{T}"/>-s.
/// </summary>
@ -229,6 +253,52 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
this.V7.Z = b.B.W;
this.V7.W = 1f;
}
/// <summary>
/// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ...
/// </summary>
public void Pack(ref Vector4Pair g)
{
this.V0.X = g.A.X;
this.V0.Y = g.A.X;
this.V0.Z = g.A.X;
this.V0.W = 1f;
this.V1.X = g.A.Y;
this.V1.Y = g.A.Y;
this.V1.Z = g.A.Y;
this.V1.W = 1f;
this.V2.X = g.A.Z;
this.V2.Y = g.A.Z;
this.V2.Z = g.A.Z;
this.V2.W = 1f;
this.V3.X = g.A.W;
this.V3.Y = g.A.W;
this.V3.Z = g.A.W;
this.V3.W = 1f;
this.V4.X = g.B.X;
this.V4.Y = g.B.X;
this.V4.Z = g.B.X;
this.V4.W = 1f;
this.V5.X = g.B.Y;
this.V5.Y = g.B.Y;
this.V5.Z = g.B.Y;
this.V5.W = 1f;
this.V6.X = g.B.Z;
this.V6.Y = g.B.Z;
this.V6.Z = g.B.Z;
this.V6.W = 1f;
this.V7.X = g.B.W;
this.V7.Y = g.B.W;
this.V7.Z = g.B.W;
this.V7.W = 1f;
}
}
}
}

2
tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs

@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(v.X, v.Y, v.Z);
Assert.Equal(expected, actual);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
}

Loading…
Cancel
Save