diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs similarity index 75% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs index 7b257b37da..117bb2c3fd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs @@ -8,16 +8,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromCmyk : JpegColorConverter + internal sealed class FromCmykBasic : JpegColorConverter { - public FromCmyk(int precision) + public FromCmykBasic(int precision) : base(JpegColorSpace.Cmyk, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan cVals = values.Component0; ReadOnlySpan mVals = values.Component1; ReadOnlySpan yVals = values.Component2; @@ -25,7 +29,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -33,7 +37,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters float c = cVals[i]; float m = mVals[i]; float y = yVals[i]; - float k = kVals[i] / this.MaximumValue; + float k = kVals[i] / maxValue; v.X = c * k; v.Y = m * k; @@ -47,4 +51,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs new file mode 100644 index 0000000000..afd36073ab --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs @@ -0,0 +1,145 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykVector8 : JpegColorConverter + { + public FromCmykVector8(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); + Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); + Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); + Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); + + k = Avx.Multiply(k, scale); + + c = Avx.Multiply(Avx.Multiply(c, k), scale); + m = Avx.Multiply(Avx.Multiply(m, k), scale); + y = Avx.Multiply(Avx.Multiply(y, k), scale); + + Vector256 cmLo = Avx.UnpackLow(c, m); + Vector256 yoLo = Avx.UnpackLow(y, one); + Vector256 cmHi = Avx.UnpackHigh(c, m); + Vector256 yoHi = Avx.UnpackHigh(y, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); + } +#else + ref Vector cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair cc = default; + Vector4Pair mm = default; + Vector4Pair yy = default; + ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); + ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); + ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector c = Unsafe.Add(ref cBase, i); + Vector m = Unsafe.Add(ref mBase, i); + Vector y = Unsafe.Add(ref yBase, i); + Vector k = Unsafe.Add(ref kBase, i) * scale; + + c = (c * k) * scale; + m = (m * k) * scale; + y = (y * k) * scale; + + ccRefAsVector = c; + mmRefAsVector = m; + yyRefAsVector = y; + + // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref cc, ref mm, ref yy); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs similarity index 74% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs index cf0bc2c920..459a33a966 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs @@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromGrayscale : JpegColorConverter + internal sealed class FromGrayscaleBasic : JpegColorConverter { - public FromGrayscale(int precision) + public FromGrayscaleBasic(int precision) : base(JpegColorSpace.Grayscale, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - var maximum = 1 / this.MaximumValue; + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); ref float sBase = ref MemoryMarshal.GetReference(values.Component0); @@ -35,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs new file mode 100644 index 0000000000..c25057c6f5 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs @@ -0,0 +1,109 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleVector8 : JpegColorConverter + { + public FromGrayscaleVector8(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + + g = Avx.Multiply(g, scale); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + } +#else + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair gg = default; + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector g = Unsafe.Add(ref gBase, i); + g *= scale; + + ggRefAsVector = g; + + // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref gg); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs similarity index 76% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs index 25889a6dfc..f9faf14353 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs @@ -8,23 +8,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromRgb : JpegColorConverter + internal sealed class FromRgbBasic : JpegColorConverter { - public FromRgb(int precision) + public FromRgbBasic(int precision) : base(JpegColorSpace.RGB, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan rVals = values.Component0; ReadOnlySpan gVals = values.Component1; ReadOnlySpan bVals = values.Component2; var v = new Vector4(0, 0, 0, 1); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -44,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs new file mode 100644 index 0000000000..ebaa512119 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs @@ -0,0 +1,132 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbVector8 : JpegColorConverter + { + public FromRgbVector8(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); + Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); + Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); + + Vector256 rgLo = Avx.UnpackLow(r, g); + Vector256 boLo = Avx.UnpackLow(b, one); + Vector256 rgHi = Avx.UnpackHigh(r, g); + Vector256 boHi = Avx.UnpackHigh(b, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); + } +#else + ref Vector rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector r = Unsafe.Add(ref rBase, i); + Vector g = Unsafe.Add(ref gBase, i); + Vector b = Unsafe.Add(ref bBase, i); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index 1319b56ee0..05258fb9b9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -9,8 +9,9 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static SixLabors.ImageSharp.SimdUtils; -#endif +#else using SixLabors.ImageSharp.Tuples; +#endif // ReSharper disable ImpureMethodCallOnReadonlyValueField namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters @@ -98,7 +99,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - // TODO: We should be savving to RGBA not Vector4 + // TODO: We should be saving to RGBA not Vector4 r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs similarity index 57% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs index 1137cdc0ec..1008889566 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs @@ -8,14 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYccK : JpegColorConverter + internal sealed class FromYccKBasic : JpegColorConverter { - public FromYccK(int precision) + public FromYccKBasic(int precision) : base(JpegColorSpace.Ycck, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) + { + ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) { // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! ReadOnlySpan yVals = values.Component0; @@ -25,19 +30,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) { float y = yVals[i]; - float cb = cbVals[i] - this.HalfValue; - float cr = crVals[i] - this.HalfValue; - float k = kVals[i] / this.MaximumValue; + float cb = cbVals[i] - halfValue; + float cr = crVals[i] - halfValue; + float k = kVals[i] / maxValue; - v.X = (this.MaximumValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; - v.Y = (this.MaximumValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; - v.Z = (this.MaximumValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; + v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; + v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; + v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; v.W = 1F; v *= scale; @@ -47,4 +52,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs new file mode 100644 index 0000000000..8645fb8fa4 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKVector8 : JpegColorConverter + { + public FromYccKVector8(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); + } + + FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-halfValue); + var scale = Vector256.Create(1 / maxValue); + var max = Vector256.Create(maxValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + k = Avx2.PermuteVar8x32(k, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); + g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); + b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); + + r = Avx.Multiply(Avx.Multiply(r, k), scale); + g = Avx.Multiply(Avx.Multiply(g, k), scale); + b = Avx.Multiply(Avx.Multiply(b, k), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#else + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-halfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / maxValue); + var max = new Vector(maxValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector k = Unsafe.Add(ref kBase, i) / max; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = (max - r.FastRound()) * k; + g = (max - g.FastRound()) * k; + b = (max - b.FastRound()) * k; + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 7c780700c9..c49ec7e387 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -21,17 +21,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { // 8-bit converters GetYCbCrConverter(8), - new FromYccK(8), - new FromCmyk(8), - new FromGrayscale(8), - new FromRgb(8), + GetYccKConverter(8), + GetCmykConverter(8), + GetGrayScaleConverter(8), + GetRgbConverter(8), // 12-bit converters GetYCbCrConverter(12), - new FromYccK(12), - new FromCmyk(12), - new FromGrayscale(12), - new FromRgb(12), + GetYccKConverter(12), + GetCmykConverter(12), + GetGrayScaleConverter(12), + GetRgbConverter(12), }; /// @@ -94,6 +94,30 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters private static JpegColorConverter GetYCbCrConverter(int precision) => FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision); + /// + /// Returns the for the YccK colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetYccKConverter(int precision) => + FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision); + + /// + /// Returns the for the CMYK colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetCmykConverter(int precision) => + FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision); + + /// + /// Returns the for the gray scale colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetGrayScaleConverter(int precision) => + FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision); + + /// + /// Returns the for the RGB colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetRgbConverter(int precision) => + FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision); + /// /// A stack-only struct to reference the input buffers using -s. /// @@ -229,6 +253,52 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters this.V7.Z = b.B.W; this.V7.W = 1f; } + + /// + /// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ... + /// + public void Pack(ref Vector4Pair g) + { + this.V0.X = g.A.X; + this.V0.Y = g.A.X; + this.V0.Z = g.A.X; + this.V0.W = 1f; + + this.V1.X = g.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = g.A.Y; + this.V1.W = 1f; + + this.V2.X = g.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = g.A.Z; + this.V2.W = 1f; + + this.V3.X = g.A.W; + this.V3.Y = g.A.W; + this.V3.Z = g.A.W; + this.V3.W = 1f; + + this.V4.X = g.B.X; + this.V4.Y = g.B.X; + this.V4.Z = g.B.X; + this.V4.W = 1f; + + this.V5.X = g.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = g.B.Y; + this.V5.W = 1f; + + this.V6.X = g.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = g.B.Z; + this.V6.W = 1f; + + this.V7.X = g.B.W; + this.V7.Y = g.B.W; + this.V7.Z = g.B.W; + this.V7.W = 1f; + } } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 860f9c396c..9abe318846 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); var expected = new Rgb(v.X, v.Y, v.Z); - Assert.Equal(expected, actual); + Assert.Equal(expected, actual, ColorSpaceComparer); Assert.Equal(1, rgba.W); } }