diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index df533cedf..073a5a9f6 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -25,6 +25,18 @@ namespace SixLabors.ImageSharp public static bool HasVector8 { get; } = Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8; + public static bool HasAvx2 + { + get + { +#if SUPPORTS_RUNTIME_INTRINSICS + return Avx2.IsSupported; +#else + return false; +#endif + } + } + /// /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs new file mode 100644 index 000000000..ef144fc1c --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs @@ -0,0 +1,22 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter + { + protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasAvx2; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs new file mode 100644 index 000000000..ed2e2cd76 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class BasicJpegColorConverter : JpegColorConverter + { + protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision) + { + } + + protected override bool IsAvailable => true; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs new file mode 100644 index 000000000..a9e3e5b51 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs @@ -0,0 +1,81 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykAvx2 : Avx2JpegColorConverter + { + public FromCmykAvx2(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); + Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); + Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); + Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); + + k = Avx.Multiply(k, scale); + + c = Avx.Multiply(Avx.Multiply(c, k), scale); + m = Avx.Multiply(Avx.Multiply(m, k), scale); + y = Avx.Multiply(Avx.Multiply(y, k), scale); + + Vector256 cmLo = Avx.UnpackLow(c, m); + Vector256 yoLo = Avx.UnpackLow(y, one); + Vector256 cmHi = Avx.UnpackHigh(c, m); + Vector256 yoHi = Avx.UnpackHigh(y, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs index 117bb2c3f..6cbd52ec3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromCmykBasic : JpegColorConverter + internal sealed class FromCmykBasic : BasicJpegColorConverter { public FromCmykBasic(int precision) : base(JpegColorSpace.Cmyk, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs deleted file mode 100644 index afd36073a..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromCmykVector8 : JpegColorConverter - { - public FromCmykVector8(int precision) - : base(JpegColorSpace.Cmyk, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 cBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 mBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector256 kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); - Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); - Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); - Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); - - k = Avx.Multiply(k, scale); - - c = Avx.Multiply(Avx.Multiply(c, k), scale); - m = Avx.Multiply(Avx.Multiply(m, k), scale); - y = Avx.Multiply(Avx.Multiply(y, k), scale); - - Vector256 cmLo = Avx.UnpackLow(c, m); - Vector256 yoLo = Avx.UnpackLow(y, one); - Vector256 cmHi = Avx.UnpackHigh(c, m); - Vector256 yoHi = Avx.UnpackHigh(y, one); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); - Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); - Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); - Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); - } -#else - ref Vector cBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector mBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair cc = default; - Vector4Pair mm = default; - Vector4Pair yy = default; - ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); - ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); - ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector c = Unsafe.Add(ref cBase, i); - Vector m = Unsafe.Add(ref mBase, i); - Vector y = Unsafe.Add(ref yBase, i); - Vector k = Unsafe.Add(ref kBase, i) * scale; - - c = (c * k) * scale; - m = (m * k) * scale; - y = (y * k) * scale; - - ccRefAsVector = c; - mmRefAsVector = m; - yyRefAsVector = y; - - // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref cc, ref mm, ref yy); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs new file mode 100644 index 000000000..e75634b0f --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs @@ -0,0 +1,71 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykVector8 : Vector8JpegColorConverter + { + public FromCmykVector8(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair cc = default; + Vector4Pair mm = default; + Vector4Pair yy = default; + ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); + ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); + ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector c = Unsafe.Add(ref cBase, i); + Vector m = Unsafe.Add(ref mBase, i); + Vector y = Unsafe.Add(ref yBase, i); + Vector k = Unsafe.Add(ref kBase, i) * scale; + + c = (c * k) * scale; + m = (m * k) * scale; + y = (y * k) * scale; + + ccRefAsVector = c; + mmRefAsVector = m; + yyRefAsVector = y; + + // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref cc, ref mm, ref yy); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs new file mode 100644 index 000000000..aebffc3df --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs @@ -0,0 +1,63 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter + { + public FromGrayscaleAvx2(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + + g = Avx.Multiply(g, scale); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs index 459a33a96..0b7a220d9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromGrayscaleBasic : JpegColorConverter + internal sealed class FromGrayscaleBasic : BasicJpegColorConverter { public FromGrayscaleBasic(int precision) : base(JpegColorSpace.Grayscale, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs deleted file mode 100644 index c25057c6f..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromGrayscaleVector8 : JpegColorConverter - { - public FromGrayscaleVector8(int precision) - : base(JpegColorSpace.Grayscale, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); - - g = Avx.Multiply(g, scale); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); - Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); - Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); - Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); - } -#else - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair gg = default; - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector g = Unsafe.Add(ref gBase, i); - g *= scale; - - ggRefAsVector = g; - - // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref gg); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs new file mode 100644 index 000000000..75ea60101 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleVector8 : Vector8JpegColorConverter + { + public FromGrayscaleVector8(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair gg = default; + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector g = Unsafe.Add(ref gBase, i); + g *= scale; + + ggRefAsVector = g; + + // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref gg); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs new file mode 100644 index 000000000..8f04c9152 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs @@ -0,0 +1,72 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbAvx2 : Avx2JpegColorConverter + { + public FromRgbAvx2(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); + Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); + Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); + + Vector256 rgLo = Avx.UnpackLow(r, g); + Vector256 boLo = Avx.UnpackLow(b, one); + Vector256 rgHi = Avx.UnpackHigh(r, g); + Vector256 boHi = Avx.UnpackHigh(b, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs index f9faf1435..ddca3fe2f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromRgbBasic : JpegColorConverter + internal sealed class FromRgbBasic : BasicJpegColorConverter { public FromRgbBasic(int precision) : base(JpegColorSpace.RGB, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs deleted file mode 100644 index ebaa51211..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromRgbVector8 : JpegColorConverter - { - public FromRgbVector8(int precision) - : base(JpegColorSpace.RGB, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 rBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 bBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); - Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); - Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); - - Vector256 rgLo = Avx.UnpackLow(r, g); - Vector256 boLo = Avx.UnpackLow(b, one); - Vector256 rgHi = Avx.UnpackHigh(r, g); - Vector256 boHi = Avx.UnpackHigh(b, one); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); - Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); - Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); - Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); - } -#else - ref Vector rBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector bBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector r = Unsafe.Add(ref rBase, i); - Vector g = Unsafe.Add(ref gBase, i); - Vector b = Unsafe.Add(ref bBase, i); - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs new file mode 100644 index 000000000..763064d1e --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbVector8 : Vector8JpegColorConverter + { + public FromRgbVector8(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector r = Unsafe.Add(ref rBase, i); + Vector g = Unsafe.Add(ref gBase, i); + Vector b = Unsafe.Add(ref bBase, i); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs new file mode 100644 index 000000000..f3a063620 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs @@ -0,0 +1,101 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +// ReSharper disable ImpureMethodCallOnReadonlyValueField +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter + { + public FromYCbCrAvx2(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + #if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + // TODO: We should be saving to RGBA not Vector4 + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs index 31fc05461..352e4acb7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrBasic : JpegColorConverter + internal sealed class FromYCbCrBasic : BasicJpegColorConverter { public FromYCbCrBasic(int precision) : base(JpegColorSpace.YCbCr, precision) @@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs deleted file mode 100644 index 05258fb9b..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -// ReSharper disable ImpureMethodCallOnReadonlyValueField -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromYCbCrSimdVector8 : JpegColorConverter - { - public FromYCbCrSimdVector8(int precision) - : base(JpegColorSpace.YCbCr, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } - - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var chromaOffset = Vector256.Create(-halfValue); - var scale = Vector256.Create(1 / maxValue); - var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(-0.344136F); - var gCrMult = Vector256.Create(-0.714136F); - var bCbMult = Vector256.Create(1.772F); - - // Used for packing. - var va = Vector256.Create(1F); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - Vector256 y = Unsafe.Add(ref yBase, i); - Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); - Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); - - y = Avx2.PermuteVar8x32(y, vcontrol); - cb = Avx2.PermuteVar8x32(cb, vcontrol); - cr = Avx2.PermuteVar8x32(cr, vcontrol); - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); - Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - - // TODO: We should be saving to RGBA not Vector4 - r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); - g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); - b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - - Vector256 vte = Avx.UnpackLow(r, b); - Vector256 vto = Avx.UnpackLow(g, va); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); - - vte = Avx.UnpackHigh(r, b); - vto = Avx.UnpackHigh(g, va); - - Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); - } -#else - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - var chromaOffset = new Vector(-halfValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); - - r = r.FastRound(); - g = g.FastRound(); - b = b.FastRound(); - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs similarity index 78% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs index 541a03615..65a7c42d8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs @@ -5,36 +5,22 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - using SixLabors.ImageSharp.Tuples; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrSimd : JpegColorConverter + internal sealed class FromYCbCrVector : VectorizedJpegColorConverter { - public FromYCbCrSimd(int precision) - : base(JpegColorSpace.YCbCr, precision) + public FromYCbCrVector(int precision) + : base(JpegColorSpace.YCbCr, precision, 8) { } - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } + protected override bool IsAvailable => true; - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!"); @@ -48,7 +34,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector4Octet resultBase = ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - var chromaOffset = new Vector4(-halfValue); + var chromaOffset = new Vector4(-this.HalfValue); + var maxValue = this.MaximumValue; // Walking 8 elements at one step: int n = result.Length / 8; @@ -112,6 +99,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters destination.Pack(ref r, ref g, ref b); } } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs new file mode 100644 index 000000000..abacf7161 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs @@ -0,0 +1,87 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +// ReSharper disable ImpureMethodCallOnReadonlyValueField +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter + { + public FromYCbCrVector8(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = r.FastRound(); + g = g.FastRound(); + b = b.FastRound(); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs new file mode 100644 index 000000000..ea0132e1e --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs @@ -0,0 +1,110 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKAvx2 : Avx2JpegColorConverter + { + public FromYccKAvx2(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var max = Vector256.Create(this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + k = Avx2.PermuteVar8x32(k, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = + HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); + g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); + b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); + + r = Avx.Multiply(Avx.Multiply(r, k), scale); + g = Avx.Multiply(Avx.Multiply(g, k), scale); + b = Avx.Multiply(Avx.Multiply(b, k), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs index 100888956..778e5325f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYccKBasic : JpegColorConverter + internal sealed class FromYccKBasic : BasicJpegColorConverter { public FromYccKBasic(int precision) : base(JpegColorSpace.Ycck, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs deleted file mode 100644 index 8645fb8fa..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromYccKVector8 : JpegColorConverter - { - public FromYccKVector8(int precision) - : base(JpegColorSpace.Ycck, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } - - FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector256 kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var chromaOffset = Vector256.Create(-halfValue); - var scale = Vector256.Create(1 / maxValue); - var max = Vector256.Create(maxValue); - var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(-0.344136F); - var gCrMult = Vector256.Create(-0.714136F); - var bCbMult = Vector256.Create(1.772F); - - // Used for packing. - var va = Vector256.Create(1F); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - // k = kVals[i] / 256F; - Vector256 y = Unsafe.Add(ref yBase, i); - Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); - Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); - Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); - - y = Avx2.PermuteVar8x32(y, vcontrol); - cb = Avx2.PermuteVar8x32(cb, vcontrol); - cr = Avx2.PermuteVar8x32(cr, vcontrol); - k = Avx2.PermuteVar8x32(k, vcontrol); - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); - Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - - r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); - g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); - b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); - - r = Avx.Multiply(Avx.Multiply(r, k), scale); - g = Avx.Multiply(Avx.Multiply(g, k), scale); - b = Avx.Multiply(Avx.Multiply(b, k), scale); - - Vector256 vte = Avx.UnpackLow(r, b); - Vector256 vto = Avx.UnpackLow(g, va); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); - - vte = Avx.UnpackHigh(r, b); - vto = Avx.UnpackHigh(g, va); - - Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); - } -#else - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - var chromaOffset = new Vector(-halfValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - var max = new Vector(maxValue); - - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - // k = kVals[i] / 256F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; - Vector k = Unsafe.Add(ref kBase, i) / max; - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); - - r = (max - r.FastRound()) * k; - g = (max - g.FastRound()) * k; - b = (max - b.FastRound()) * k; - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs new file mode 100644 index 000000000..c360392de --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs @@ -0,0 +1,91 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKVector8 : Vector8JpegColorConverter + { + public FromYccKVector8(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + var max = new Vector(this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector k = Unsafe.Add(ref kBase, i) / max; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = (max - r.FastRound()) * k; + g = (max - g.FastRound()) * k; + b = (max - b.FastRound()) * k; + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs new file mode 100644 index 000000000..3e9b889db --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter + { + protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasVector8; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs new file mode 100644 index 000000000..522be82c2 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs @@ -0,0 +1,46 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class VectorizedJpegColorConverter : JpegColorConverter + { + private readonly int vectorSize; + + protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize) + : base(colorSpace, precision) + { + this.vectorSize = vectorSize; + } + + public sealed override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % this.vectorSize; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!this.IsAvailable) + { + throw new InvalidOperationException( + "This converter can be used only on architecture having 256 byte floating point SIMD registers!"); + } + + this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount)); + } + + this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder)); + } + + protected abstract void ConvertCoreVectorized(in ComponentValues values, Span result); + + protected abstract void ConvertCore(in ComponentValues values, Span result); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index c49ec7e38..7b3c11938 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Numerics; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -17,22 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// /// The available converters /// - private static readonly JpegColorConverter[] Converters = - { - // 8-bit converters - GetYCbCrConverter(8), - GetYccKConverter(8), - GetCmykConverter(8), - GetGrayScaleConverter(8), - GetRgbConverter(8), - - // 12-bit converters - GetYCbCrConverter(12), - GetYccKConverter(12), - GetCmykConverter(12), - GetGrayScaleConverter(12), - GetRgbConverter(12), - }; + private static readonly JpegColorConverter[] Converters = CreateConverters(); /// /// Initializes a new instance of the class. @@ -45,6 +31,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters this.HalfValue = MathF.Ceiling(this.MaximumValue / 2); } + /// + /// Gets a value indicating whether this is available + /// on the current runtime and CPU architecture. + /// + protected abstract bool IsAvailable { get; } + /// /// Gets the of this converter. /// @@ -89,34 +81,79 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters public abstract void ConvertToRgba(in ComponentValues values, Span result); /// - /// Returns the for the YCbCr colorspace that matches the current CPU architecture. + /// Returns the s for all supported colorspaces and precisions. /// - private static JpegColorConverter GetYCbCrConverter(int precision) => - FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision); + private static JpegColorConverter[] CreateConverters() + { + var converters = new List(); + + // 8-bit converters + converters.AddRange(GetYCbCrConverters(8)); + converters.AddRange(GetYccKConverters(8)); + converters.AddRange(GetCmykConverters(8)); + converters.AddRange(GetGrayScaleConverters(8)); + converters.AddRange(GetRgbConverters(8)); + + // 8-bit converters + converters.AddRange(GetYCbCrConverters(12)); + converters.AddRange(GetYccKConverters(12)); + converters.AddRange(GetCmykConverters(12)); + converters.AddRange(GetGrayScaleConverters(12)); + converters.AddRange(GetRgbConverters(12)); + + return converters.Where(x => x.IsAvailable).ToArray(); + } /// - /// Returns the for the YccK colorspace that matches the current CPU architecture. + /// Returns the s for the YCbCr colorspace. /// - private static JpegColorConverter GetYccKConverter(int precision) => - FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision); + private static IEnumerable GetYCbCrConverters(int precision) + { + yield return new FromYCbCrAvx2(precision); + yield return new FromYCbCrVector8(precision); + yield return new FromYCbCrVector(precision); + yield return new FromYCbCrBasic(precision); + } /// - /// Returns the for the CMYK colorspace that matches the current CPU architecture. + /// Returns the s for the YccK colorspace. /// - private static JpegColorConverter GetCmykConverter(int precision) => - FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision); + private static IEnumerable GetYccKConverters(int precision) + { + yield return new FromYccKAvx2(precision); + yield return new FromYccKVector8(precision); + yield return new FromYccKBasic(precision); + } /// - /// Returns the for the gray scale colorspace that matches the current CPU architecture. + /// Returns the s for the CMYK colorspace. /// - private static JpegColorConverter GetGrayScaleConverter(int precision) => - FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision); + private static IEnumerable GetCmykConverters(int precision) + { + yield return new FromCmykAvx2(precision); + yield return new FromCmykVector8(precision); + yield return new FromCmykBasic(precision); + } /// - /// Returns the for the RGB colorspace that matches the current CPU architecture. + /// Returns the s for the gray scale colorspace. /// - private static JpegColorConverter GetRgbConverter(int precision) => - FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision); + private static IEnumerable GetGrayScaleConverters(int precision) + { + yield return new FromGrayscaleAvx2(precision); + yield return new FromGrayscaleVector8(precision); + yield return new FromGrayscaleBasic(precision); + } + + /// + /// Returns the s for the RGB colorspace. + /// + private static IEnumerable GetRgbConverters(int precision) + { + yield return new FromRgbAvx2(precision); + yield return new FromRgbVector8(precision); + yield return new FromRgbBasic(precision); + } /// /// A stack-only struct to reference the input buffers using -s. diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index 7b47cf94a..9b4b4568f 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -41,7 +41,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrBasic(8).ConvertToRgba(values, this.output); } [Benchmark(Baseline = true)] @@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector(8).ConvertToRgba(values, this.output); } [Benchmark] @@ -57,7 +57,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimdVector8.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output); } private static Buffer2D[] CreateRandomValues( diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 9abe31884..50513b289 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -22,6 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg private static readonly ApproximateColorSpaceComparer ColorSpaceComparer = new ApproximateColorSpaceComparer(Precision); + // int inputBufferLength, int resultBufferLength, int seed public static readonly TheoryData CommonConversionData = new TheoryData { @@ -51,44 +52,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed) { - float y = values.Component0[i]; - float cb = values.Component1[i]; - float cr = values.Component2[i]; - var ycbcr = new YCbCr(y, cb, cr); - - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = ColorSpaceConverter.ToRgb(ycbcr); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + ValidateRgbToYCbCrConversion( + new JpegColorConverter.FromYCbCrVector(8), + 3, + inputBufferLength, + resultBufferLength, + seed); } [Theory] - [InlineData(64, 1)] - [InlineData(16, 2)] - [InlineData(8, 3)] - public void FromYCbCrSimd_ConvertCore(int size, int seed) + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrVector8(int inputBufferLength, int resultBufferLength, int seed) { - JpegColorConverter.ComponentValues values = CreateRandomValues(3, size, seed); - var result = new Vector4[size]; - - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, result, 255, 128); - - for (int i = 0; i < size; i++) + if (!SimdUtils.HasVector8) { - ValidateYCbCr(values, result, i); + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } - } - [Theory] - [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int seed) - { ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimd(8), + new JpegColorConverter.FromYCbCrVector8(8), 3, inputBufferLength, resultBufferLength, @@ -97,9 +84,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int seed) { - if (!SimdUtils.HasVector8) + if (!SimdUtils.HasAvx2) { this.Output.WriteLine("No AVX2 present, skipping test!"); return; @@ -107,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s); ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimdVector8(8), + new JpegColorConverter.FromYCbCrAvx2(8), 3, inputBufferLength, resultBufferLength, @@ -138,7 +125,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); var result = new Vector4[count]; - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd(8) : new JpegColorConverter.FromYCbCrBasic(8); + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); // Warm up: converter.ConvertToRgba(values, result); @@ -331,5 +318,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ValidateYCbCr(values, result, i); } } + + private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float y = values.Component0[i]; + float cb = values.Component1[i]; + float cr = values.Component2[i]; + var ycbcr = new YCbCr(y, cb, cr); + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = ColorSpaceConverter.ToRgb(ycbcr); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } } }