diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 17ccb396d6..da93e1500c 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -616,7 +616,12 @@ internal static partial class SimdUtils return Fma.MultiplyAdd(vm1, vm0, va); } - return Avx.Add(Avx.Multiply(vm0, vm1), va); + if (Avx.IsSupported) + { + return Avx.Add(Avx.Multiply(vm0, vm1), va); + } + + return va + (vm0 * vm1); } /// @@ -644,7 +649,12 @@ internal static partial class SimdUtils return AdvSimd.Add(AdvSimd.Multiply(vm0, vm1), va); } - return Sse.Add(Sse.Multiply(vm0, vm1), va); + if (Sse.IsSupported) + { + return Sse.Add(Sse.Multiply(vm0, vm1), va); + } + + return va + (vm0 * vm1); } /// diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs index b6dd319f06..8f4ea6292c 100644 --- a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs @@ -193,13 +193,70 @@ internal static class Vector128Utilities return AdvSimd.ConvertToInt32RoundToEven(vector); } - Vector128 sign = vector & Vector128.Create(-0.0f); - Vector128 val_2p23_f32 = sign | Vector128.Create(8388608.0f); + Vector128 sign = vector & Vector128.Create(-0F); + Vector128 val_2p23_f32 = sign | Vector128.Create(8388608F); val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; return Vector128.ConvertToInt32(val_2p23_f32 | sign); } + /// + /// Rounds all values in to the nearest integer + /// following semantics. + /// + /// The vector + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 RoundToNearestInteger(Vector128 vector) + { + if (Sse41.IsSupported) + { + return Sse41.RoundToNearestInteger(vector); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.RoundToNearest(vector); + } + + Vector128 sign = vector & Vector128.Create(-0F); + Vector128 val_2p23_f32 = sign | Vector128.Create(8388608F); + + val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; + return val_2p23_f32 | sign; + } + + /// + /// Performs a multiplication and an addition of the . + /// + /// ret = (vm0 * vm1) + va + /// The vector to add to the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 MultiplyAdd( + Vector128 va, + Vector128 vm0, + Vector128 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAdd(vm1, vm0, va); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.Add(AdvSimd.Multiply(vm0, vm1), va); + } + + if (Sse.IsSupported) + { + return Sse.Add(Sse.Multiply(vm0, vm1), va); + } + + return va + (vm0 * vm1); + } + /// /// Packs signed 16-bit integers to unsigned 8-bit integers and saturates. /// diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs index 6e8c0d1de4..a7366f6dae 100644 --- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs @@ -103,13 +103,60 @@ internal static class Vector256Utilities return Vector256.Create(lower, upper); } - Vector256 sign = vector & Vector256.Create(-0.0f); - Vector256 val_2p23_f32 = sign | Vector256.Create(8388608.0f); + Vector256 sign = vector & Vector256.Create(-0F); + Vector256 val_2p23_f32 = sign | Vector256.Create(8388608F); val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; return Vector256.ConvertToInt32(val_2p23_f32 | sign); } + /// + /// Rounds all values in to the nearest integer + /// following semantics. + /// + /// The vector + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 RoundToNearestInteger(Vector256 vector) + { + if (Avx.IsSupported) + { + return Avx.RoundToNearestInteger(vector); + } + + Vector256 sign = vector & Vector256.Create(-0F); + Vector256 val_2p23_f32 = sign | Vector256.Create(8388608F); + + val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; + return val_2p23_f32 | sign; + } + + /// + /// Performs a multiplication and an addition of the . + /// + /// ret = (vm0 * vm1) + va + /// The vector to add to the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 MultiplyAdd( + Vector256 va, + Vector256 vm0, + Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAdd(vm1, vm0, va); + } + + if (Avx.IsSupported) + { + return Avx.Add(Avx.Multiply(vm0, vm1), va); + } + + return va + (vm0 * vm1); + } + [DoesNotReturn] private static void ThrowUnreachableException() => throw new UnreachableException(); } diff --git a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs index 0165af90ef..77dc944756 100644 --- a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs @@ -110,6 +110,43 @@ internal static class Vector512Utilities return Vector512.ConvertToInt32(val_2p23_f32 | sign); } + /// + /// Rounds all values in to the nearest integer + /// following semantics. + /// + /// The vector + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 RoundToNearestInteger(Vector512 vector) + { + if (Avx512F.IsSupported) + { + // imm8 = 0b1000: + // imm8[7:4] = 0b0000 -> preserve 0 fractional bits (round to whole numbers) + // imm8[3:0] = 0b1000 -> _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC (round to nearest even, suppress exceptions) + return Avx512F.RoundScale(vector, 0b0000_1000); + } + + Vector512 sign = vector & Vector512.Create(-0F); + Vector512 val_2p23_f32 = sign | Vector512.Create(8388608F); + + val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; + return val_2p23_f32 | sign; + } + + /// + /// Performs a multiplication and an addition of the . + /// + /// ret = (vm0 * vm1) + va + /// The vector to add to the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyAdd( + Vector512 va, + Vector512 vm0, + Vector512 vm1) => va + (vm0 * vm1); + [DoesNotReturn] private static void ThrowUnreachableException() => throw new UnreachableException(); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs index a59be009b7..84b0b16d09 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs @@ -17,7 +17,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceVectorized(in ComponentValues values) + protected override void ConvertToRgbInPlaceVectorized(in ComponentValues values) { ref Vector cBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); @@ -46,7 +46,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceScalarRemainder(in ComponentValues values) + protected override void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values) => CmykScalar.ConvertToRgbInplace(values, this.MaximumValue); /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs index cac10636f5..b6283084f9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs @@ -17,7 +17,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceVectorized(in ComponentValues values) + protected override void ConvertToRgbInPlaceVectorized(in ComponentValues values) { ref Vector cBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); @@ -33,7 +33,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceScalarRemainder(in ComponentValues values) + protected override void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values) => GrayscaleScalar.ConvertToRgbInplace(values.Component0, this.MaximumValue); /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs index bd3142fa13..5fabc80c78 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs @@ -17,7 +17,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceVectorized(in ComponentValues values) + protected override void ConvertToRgbInPlaceVectorized(in ComponentValues values) { ref Vector rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); @@ -41,7 +41,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceScalarRemainder(in ComponentValues values) + protected override void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values) => RgbScalar.ConvertToRgbInplace(values, this.MaximumValue); /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs index e3700b2ca6..92ff71b101 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs @@ -18,7 +18,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceVectorized(in ComponentValues values) + protected override void ConvertToRgbInPlaceVectorized(in ComponentValues values) { ref Vector c0Base = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); @@ -69,7 +69,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceScalarRemainder(in ComponentValues values) + protected override void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values) => YCbCrScalar.ConvertToRgbInplace(values, this.MaximumValue, this.HalfValue); /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector128.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector128.cs new file mode 100644 index 0000000000..95e3167cc4 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector128.cs @@ -0,0 +1,116 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using SixLabors.ImageSharp.Common.Helpers; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components; + +internal abstract partial class JpegColorConverterBase +{ + internal sealed class YCbCrVector128 : JpegColorConverterVector128 + { + public YCbCrVector128(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + /// + public override void ConvertToRgbInPlace(in ComponentValues values) + { + ref Vector128 c0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector128 c1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector128 c2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + Vector128 chromaOffset = Vector128.Create(-this.HalfValue); + Vector128 scale = Vector128.Create(1 / this.MaximumValue); + Vector128 rCrMult = Vector128.Create(YCbCrScalar.RCrMult); + Vector128 gCbMult = Vector128.Create(-YCbCrScalar.GCbMult); + Vector128 gCrMult = Vector128.Create(-YCbCrScalar.GCrMult); + Vector128 bCbMult = Vector128.Create(YCbCrScalar.BCbMult); + + // Walking 8 elements at one step: + nuint n = values.Component0.Vector128Count(); + for (nuint i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + ref Vector128 c0 = ref Unsafe.Add(ref c0Base, i); + ref Vector128 c1 = ref Unsafe.Add(ref c1Base, i); + ref Vector128 c2 = ref Unsafe.Add(ref c2Base, i); + + Vector128 y = c0; + Vector128 cb = c1 + chromaOffset; + Vector128 cr = c2 + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + Vector128 r = Vector128Utilities.MultiplyAdd(y, cr, rCrMult); + Vector128 g = Vector128Utilities.MultiplyAdd(Vector128Utilities.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector128 b = Vector128Utilities.MultiplyAdd(y, cb, bCbMult); + + r = Vector128Utilities.RoundToNearestInteger(r) * scale; + g = Vector128Utilities.RoundToNearestInteger(g) * scale; + b = Vector128Utilities.RoundToNearestInteger(b) * scale; + + c0 = r; + c1 = g; + c2 = b; + } + } + + /// + public override void ConvertFromRgb(in ComponentValues values, Span rLane, Span gLane, Span bLane) + { + ref Vector128 destY = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector128 destCb = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector128 destCr = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector128 srcR = + ref Unsafe.As>(ref MemoryMarshal.GetReference(rLane)); + ref Vector128 srcG = + ref Unsafe.As>(ref MemoryMarshal.GetReference(gLane)); + ref Vector128 srcB = + ref Unsafe.As>(ref MemoryMarshal.GetReference(bLane)); + + Vector128 chromaOffset = Vector128.Create(this.HalfValue); + Vector128 f0299 = Vector128.Create(0.299f); + Vector128 f0587 = Vector128.Create(0.587f); + Vector128 f0114 = Vector128.Create(0.114f); + Vector128 fn0168736 = Vector128.Create(-0.168736f); + Vector128 fn0331264 = Vector128.Create(-0.331264f); + Vector128 fn0418688 = Vector128.Create(-0.418688f); + Vector128 fn0081312F = Vector128.Create(-0.081312F); + Vector128 f05 = Vector128.Create(0.5f); + + nuint n = values.Component0.Vector128Count(); + for (nuint i = 0; i < n; i++) + { + Vector128 r = Unsafe.Add(ref srcR, i); + Vector128 g = Unsafe.Add(ref srcG, i); + Vector128 b = Unsafe.Add(ref srcB, i); + + // y = 0 + (0.299 * r) + (0.587 * g) + (0.114 * b) + // cb = 128 - (0.168736 * r) - (0.331264 * g) + (0.5 * b) + // cr = 128 + (0.5 * r) - (0.418688 * g) - (0.081312 * b) + Vector128 y = Vector128Utilities.MultiplyAdd(Vector128Utilities.MultiplyAdd(f0114 * b, f0587, g), f0299, r); + Vector128 cb = chromaOffset + Vector128Utilities.MultiplyAdd(Vector128Utilities.MultiplyAdd(f05 * b, fn0331264, g), fn0168736, r); + Vector128 cr = chromaOffset + Vector128Utilities.MultiplyAdd(Vector128Utilities.MultiplyAdd(fn0081312F * b, fn0418688, g), f05, r); + + Unsafe.Add(ref destY, i) = y; + Unsafe.Add(ref destCb, i) = cb; + Unsafe.Add(ref destCr, i) = cr; + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector256.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector256.cs new file mode 100644 index 0000000000..8d8e234685 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector256.cs @@ -0,0 +1,116 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using SixLabors.ImageSharp.Common.Helpers; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components; + +internal abstract partial class JpegColorConverterBase +{ + internal sealed class YCbCrVector256 : JpegColorConverterVector256 + { + public YCbCrVector256(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + /// + public override void ConvertToRgbInPlace(in ComponentValues values) + { + ref Vector256 c0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 c1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 c2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + Vector256 chromaOffset = Vector256.Create(-this.HalfValue); + Vector256 scale = Vector256.Create(1 / this.MaximumValue); + Vector256 rCrMult = Vector256.Create(YCbCrScalar.RCrMult); + Vector256 gCbMult = Vector256.Create(-YCbCrScalar.GCbMult); + Vector256 gCrMult = Vector256.Create(-YCbCrScalar.GCrMult); + Vector256 bCbMult = Vector256.Create(YCbCrScalar.BCbMult); + + // Walking 8 elements at one step: + nuint n = values.Component0.Vector256Count(); + for (nuint i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + ref Vector256 c0 = ref Unsafe.Add(ref c0Base, i); + ref Vector256 c1 = ref Unsafe.Add(ref c1Base, i); + ref Vector256 c2 = ref Unsafe.Add(ref c2Base, i); + + Vector256 y = c0; + Vector256 cb = c1 + chromaOffset; + Vector256 cr = c2 + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + Vector256 r = Vector256Utilities.MultiplyAdd(y, cr, rCrMult); + Vector256 g = Vector256Utilities.MultiplyAdd(Vector256Utilities.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = Vector256Utilities.MultiplyAdd(y, cb, bCbMult); + + r = Vector256Utilities.RoundToNearestInteger(r) * scale; + g = Vector256Utilities.RoundToNearestInteger(g) * scale; + b = Vector256Utilities.RoundToNearestInteger(b) * scale; + + c0 = r; + c1 = g; + c2 = b; + } + } + + /// + public override void ConvertFromRgb(in ComponentValues values, Span rLane, Span gLane, Span bLane) + { + ref Vector256 destY = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 destCb = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 destCr = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 srcR = + ref Unsafe.As>(ref MemoryMarshal.GetReference(rLane)); + ref Vector256 srcG = + ref Unsafe.As>(ref MemoryMarshal.GetReference(gLane)); + ref Vector256 srcB = + ref Unsafe.As>(ref MemoryMarshal.GetReference(bLane)); + + Vector256 chromaOffset = Vector256.Create(this.HalfValue); + Vector256 f0299 = Vector256.Create(0.299f); + Vector256 f0587 = Vector256.Create(0.587f); + Vector256 f0114 = Vector256.Create(0.114f); + Vector256 fn0168736 = Vector256.Create(-0.168736f); + Vector256 fn0331264 = Vector256.Create(-0.331264f); + Vector256 fn0418688 = Vector256.Create(-0.418688f); + Vector256 fn0081312F = Vector256.Create(-0.081312F); + Vector256 f05 = Vector256.Create(0.5f); + + nuint n = values.Component0.Vector256Count(); + for (nuint i = 0; i < n; i++) + { + Vector256 r = Unsafe.Add(ref srcR, i); + Vector256 g = Unsafe.Add(ref srcG, i); + Vector256 b = Unsafe.Add(ref srcB, i); + + // y = 0 + (0.299 * r) + (0.587 * g) + (0.114 * b) + // cb = 128 - (0.168736 * r) - (0.331264 * g) + (0.5 * b) + // cr = 128 + (0.5 * r) - (0.418688 * g) - (0.081312 * b) + Vector256 y = Vector256Utilities.MultiplyAdd(Vector256Utilities.MultiplyAdd(f0114 * b, f0587, g), f0299, r); + Vector256 cb = chromaOffset + Vector256Utilities.MultiplyAdd(Vector256Utilities.MultiplyAdd(f05 * b, fn0331264, g), fn0168736, r); + Vector256 cr = chromaOffset + Vector256Utilities.MultiplyAdd(Vector256Utilities.MultiplyAdd(fn0081312F * b, fn0418688, g), f05, r); + + Unsafe.Add(ref destY, i) = y; + Unsafe.Add(ref destCb, i) = cb; + Unsafe.Add(ref destCr, i) = cr; + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector512.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector512.cs new file mode 100644 index 0000000000..79ad03512a --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector512.cs @@ -0,0 +1,123 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using SixLabors.ImageSharp.Common.Helpers; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components; + +internal abstract partial class JpegColorConverterBase +{ + internal sealed class YCbCrVector512 : JpegColorConverterVector512 + { + public YCbCrVector512(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + /// + protected override void ConvertToRgbInPlaceVectorized(in ComponentValues values) + { + ref Vector512 c0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector512 c1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector512 c2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + Vector512 chromaOffset = Vector512.Create(-this.HalfValue); + Vector512 scale = Vector512.Create(1 / this.MaximumValue); + Vector512 rCrMult = Vector512.Create(YCbCrScalar.RCrMult); + Vector512 gCbMult = Vector512.Create(-YCbCrScalar.GCbMult); + Vector512 gCrMult = Vector512.Create(-YCbCrScalar.GCrMult); + Vector512 bCbMult = Vector512.Create(YCbCrScalar.BCbMult); + + nuint n = values.Component0.Vector512Count(); + for (nuint i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + ref Vector512 c0 = ref Unsafe.Add(ref c0Base, i); + ref Vector512 c1 = ref Unsafe.Add(ref c1Base, i); + ref Vector512 c2 = ref Unsafe.Add(ref c2Base, i); + + Vector512 y = c0; + Vector512 cb = c1 + chromaOffset; + Vector512 cr = c2 + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + Vector512 r = Vector512Utilities.MultiplyAdd(y, cr, rCrMult); + Vector512 g = Vector512Utilities.MultiplyAdd(Vector512Utilities.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector512 b = Vector512Utilities.MultiplyAdd(y, cb, bCbMult); + + r = Vector512Utilities.RoundToNearestInteger(r) * scale; + g = Vector512Utilities.RoundToNearestInteger(g) * scale; + b = Vector512Utilities.RoundToNearestInteger(b) * scale; + + c0 = r; + c1 = g; + c2 = b; + } + } + + /// + protected override void ConvertFromRgbVectorized(in ComponentValues values, Span rLane, Span gLane, Span bLane) + { + ref Vector512 destY = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector512 destCb = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector512 destCr = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector512 srcR = + ref Unsafe.As>(ref MemoryMarshal.GetReference(rLane)); + ref Vector512 srcG = + ref Unsafe.As>(ref MemoryMarshal.GetReference(gLane)); + ref Vector512 srcB = + ref Unsafe.As>(ref MemoryMarshal.GetReference(bLane)); + + Vector512 chromaOffset = Vector512.Create(this.HalfValue); + Vector512 f0299 = Vector512.Create(0.299f); + Vector512 f0587 = Vector512.Create(0.587f); + Vector512 f0114 = Vector512.Create(0.114f); + Vector512 fn0168736 = Vector512.Create(-0.168736f); + Vector512 fn0331264 = Vector512.Create(-0.331264f); + Vector512 fn0418688 = Vector512.Create(-0.418688f); + Vector512 fn0081312F = Vector512.Create(-0.081312F); + Vector512 f05 = Vector512.Create(0.5f); + + nuint n = values.Component0.Vector512Count(); + for (nuint i = 0; i < n; i++) + { + Vector512 r = Unsafe.Add(ref srcR, i); + Vector512 g = Unsafe.Add(ref srcG, i); + Vector512 b = Unsafe.Add(ref srcB, i); + + // y = 0 + (0.299 * r) + (0.587 * g) + (0.114 * b) + // cb = 128 - (0.168736 * r) - (0.331264 * g) + (0.5 * b) + // cr = 128 + (0.5 * r) - (0.418688 * g) - (0.081312 * b) + Vector512 y = Vector512Utilities.MultiplyAdd(Vector512Utilities.MultiplyAdd(f0114 * b, f0587, g), f0299, r); + Vector512 cb = chromaOffset + Vector512Utilities.MultiplyAdd(Vector512Utilities.MultiplyAdd(f05 * b, fn0331264, g), fn0168736, r); + Vector512 cr = chromaOffset + Vector512Utilities.MultiplyAdd(Vector512Utilities.MultiplyAdd(fn0081312F * b, fn0418688, g), f05, r); + + Unsafe.Add(ref destY, i) = y; + Unsafe.Add(ref destCb, i) = cb; + Unsafe.Add(ref destCr, i) = cr; + } + } + + /// + protected override void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values) + => YCbCrScalar.ConvertToRgbInplace(values, this.MaximumValue, this.HalfValue); + + /// + protected override void ConvertFromRgbScalarRemainder(in ComponentValues values, Span rLane, Span gLane, Span bLane) + => YCbCrScalar.ConvertFromRgb(values, this.HalfValue, rLane, gLane, bLane); + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs index 535e5f1253..5d57d0e541 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs @@ -17,7 +17,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceVectorized(in ComponentValues values) + protected override void ConvertToRgbInPlaceVectorized(in ComponentValues values) { ref Vector c0Base = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); @@ -70,7 +70,7 @@ internal abstract partial class JpegColorConverterBase } /// - protected override void ConvertToRgbInplaceScalarRemainder(in ComponentValues values) + protected override void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values) => YccKScalar.ConvertToRgpInplace(values, this.MaximumValue, this.HalfValue); /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterBase.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterBase.cs index 581919e72c..5681153a10 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterBase.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterBase.cs @@ -116,6 +116,21 @@ internal abstract partial class JpegColorConverterBase /// The precision in bits. private static JpegColorConverterBase GetYCbCrConverter(int precision) { + if (JpegColorConverterVector512.IsSupported) + { + return new YCbCrVector512(precision); + } + + if (JpegColorConverterVector256.IsSupported) + { + return new YCbCrVector256(precision); + } + + if (JpegColorConverterVector128.IsSupported) + { + return new YCbCrVector128(precision); + } + if (JpegColorConverterVector.IsSupported) { return new YCbCrVector(precision); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector.cs index 1c37f5eef2..f3c3eb8db5 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector.cs @@ -46,7 +46,7 @@ internal abstract partial class JpegColorConverterBase int simdCount = length - remainder; if (simdCount > 0) { - this.ConvertToRgbInplaceVectorized(values.Slice(0, simdCount)); + this.ConvertToRgbInPlaceVectorized(values.Slice(0, simdCount)); } // Jpeg images width is always divisible by 8 without a remainder @@ -56,12 +56,12 @@ internal abstract partial class JpegColorConverterBase // remainder pixels if (remainder > 0) { - this.ConvertToRgbInplaceScalarRemainder(values.Slice(simdCount, remainder)); + this.ConvertToRgbInPlaceScalarRemainder(values.Slice(simdCount, remainder)); } } /// - public sealed override void ConvertFromRgb(in ComponentValues values, Span r, Span g, Span b) + public sealed override void ConvertFromRgb(in ComponentValues values, Span rLane, Span gLane, Span bLane) { DebugGuard.IsTrue(this.IsAvailable, $"{this.GetType().Name} converter is not supported on current hardware."); @@ -73,9 +73,9 @@ internal abstract partial class JpegColorConverterBase { this.ConvertFromRgbVectorized( values.Slice(0, simdCount), - r.Slice(0, simdCount), - g.Slice(0, simdCount), - b.Slice(0, simdCount)); + rLane[..simdCount], + gLane[..simdCount], + bLane[..simdCount]); } // Jpeg images width is always divisible by 8 without a remainder @@ -87,25 +87,25 @@ internal abstract partial class JpegColorConverterBase { this.ConvertFromRgbScalarRemainder( values.Slice(simdCount, remainder), - r.Slice(simdCount, remainder), - g.Slice(simdCount, remainder), - b.Slice(simdCount, remainder)); + rLane.Slice(simdCount, remainder), + gLane.Slice(simdCount, remainder), + bLane.Slice(simdCount, remainder)); } } /// /// Converts planar jpeg component values in - /// to RGB color space inplace using API. + /// to RGB color space in place using API. /// - /// The input/ouptut as a stack-only struct - protected abstract void ConvertToRgbInplaceVectorized(in ComponentValues values); + /// The input/output as a stack-only struct + protected abstract void ConvertToRgbInPlaceVectorized(in ComponentValues values); /// /// Converts remainder of the planar jpeg component values after - /// conversion in . + /// conversion in . /// - /// The input/ouptut as a stack-only struct - protected abstract void ConvertToRgbInplaceScalarRemainder(in ComponentValues values); + /// The input/output as a stack-only struct + protected abstract void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values); /// /// Converts RGB lanes to jpeg component values using API. diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector128.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector128.cs new file mode 100644 index 0000000000..35458a9a00 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector128.cs @@ -0,0 +1,34 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.Intrinsics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components; + +internal abstract partial class JpegColorConverterBase +{ + /// + /// abstract base for implementations + /// based on instructions. + /// + /// + /// Converters of this family would expect input buffers lengths to be + /// divisible by 8 without a remainder. + /// This is guaranteed by real-life data as jpeg stores pixels via 8x8 blocks. + /// DO NOT pass test data of invalid size to these converters as they + /// potentially won't do a bound check and return a false positive result. + /// + internal abstract class JpegColorConverterVector128 : JpegColorConverterBase + { + protected JpegColorConverterVector128(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision) + { + } + + public static bool IsSupported => Vector128.IsHardwareAccelerated && Vector128.IsSupported; + + public sealed override bool IsAvailable => IsSupported; + + public sealed override int ElementsPerBatch => Vector128.Count; + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector256.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector256.cs new file mode 100644 index 0000000000..14442383ae --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector256.cs @@ -0,0 +1,34 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.Intrinsics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components; + +internal abstract partial class JpegColorConverterBase +{ + /// + /// abstract base for implementations + /// based on instructions. + /// + /// + /// Converters of this family would expect input buffers lengths to be + /// divisible by 8 without a remainder. + /// This is guaranteed by real-life data as jpeg stores pixels via 8x8 blocks. + /// DO NOT pass test data of invalid size to these converters as they + /// potentially won't do a bound check and return a false positive result. + /// + internal abstract class JpegColorConverterVector256 : JpegColorConverterBase + { + protected JpegColorConverterVector256(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision) + { + } + + public static bool IsSupported => Vector256.IsHardwareAccelerated && Vector256.IsSupported; + + public sealed override bool IsAvailable => IsSupported; + + public sealed override int ElementsPerBatch => Vector256.Count; + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector512.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector512.cs new file mode 100644 index 0000000000..e916ea5aa1 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverterVector512.cs @@ -0,0 +1,111 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Numerics; +using System.Runtime.Intrinsics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components; + +internal abstract partial class JpegColorConverterBase +{ + /// + /// abstract base for implementations + /// based on instructions. + /// + internal abstract class JpegColorConverterVector512 : JpegColorConverterBase + { + protected JpegColorConverterVector512(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision) + { + } + + public static bool IsSupported => Vector512.IsHardwareAccelerated && Vector512.IsSupported; + + /// + public override bool IsAvailable => IsSupported; + + /// + public override int ElementsPerBatch => Vector512.Count; + + /// + public sealed override void ConvertFromRgb(in ComponentValues values, Span rLane, Span gLane, Span bLane) + { + DebugGuard.IsTrue(this.IsAvailable, $"{this.GetType().Name} converter is not supported on current hardware."); + + int length = values.Component0.Length; + int remainder = (int)((uint)length % (uint)Vector512.Count); + + int simdCount = length - remainder; + if (simdCount > 0) + { + this.ConvertFromRgbVectorized( + values.Slice(0, simdCount), + rLane[..simdCount], + gLane[..simdCount], + bLane[..simdCount]); + } + + if (remainder > 0) + { + this.ConvertFromRgbScalarRemainder( + values.Slice(simdCount, remainder), + rLane.Slice(simdCount, remainder), + gLane.Slice(simdCount, remainder), + bLane.Slice(simdCount, remainder)); + } + } + + /// + public sealed override void ConvertToRgbInPlace(in ComponentValues values) + { + DebugGuard.IsTrue(this.IsAvailable, $"{this.GetType().Name} converter is not supported on current hardware."); + + int length = values.Component0.Length; + int remainder = (int)((uint)length % (uint)Vector512.Count); + + int simdCount = length - remainder; + if (simdCount > 0) + { + this.ConvertToRgbInPlaceVectorized(values.Slice(0, simdCount)); + } + + if (remainder > 0) + { + this.ConvertToRgbInPlaceScalarRemainder(values.Slice(simdCount, remainder)); + } + } + + /// + /// Converts planar jpeg component values in + /// to RGB color space in place using API. + /// + /// The input/output as a stack-only struct + protected abstract void ConvertToRgbInPlaceVectorized(in ComponentValues values); + + /// + /// Converts remainder of the planar jpeg component values after + /// conversion in . + /// + /// The input/output as a stack-only struct + protected abstract void ConvertToRgbInPlaceScalarRemainder(in ComponentValues values); + + /// + /// Converts RGB lanes to jpeg component values using API. + /// + /// Jpeg component values. + /// Red colors lane. + /// Green colors lane. + /// Blue colors lane. + protected abstract void ConvertFromRgbVectorized(in ComponentValues values, Span rLane, Span gLane, Span bLane); + + /// + /// Converts remainder of RGB lanes to jpeg component values after + /// conversion in . + /// + /// Jpeg component values. + /// Red colors lane. + /// Green colors lane. + /// Blue colors lane. + protected abstract void ConvertFromRgbScalarRemainder(in ComponentValues values, Span rLane, Span gLane, Span bLane); + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs index eb9224fee9..897ef9375d 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs @@ -17,7 +17,7 @@ public class YCbCrColorConversion : ColorConversionBenchmark [Benchmark] public void Scalar() { - var values = new JpegColorConverterBase.ComponentValues(this.Input, 0); + JpegColorConverterBase.ComponentValues values = new(this.Input, 0); new JpegColorConverterBase.YCbCrScalar(8).ConvertToRgbInPlace(values); } @@ -25,8 +25,32 @@ public class YCbCrColorConversion : ColorConversionBenchmark [Benchmark] public void SimdVector8() { - var values = new JpegColorConverterBase.ComponentValues(this.Input, 0); + JpegColorConverterBase.ComponentValues values = new(this.Input, 0); new JpegColorConverterBase.YCbCrVector(8).ConvertToRgbInPlace(values); } + + [Benchmark] + public void SimdVector128() + { + JpegColorConverterBase.ComponentValues values = new(this.Input, 0); + + new JpegColorConverterBase.YCbCrVector128(8).ConvertToRgbInPlace(values); + } + + [Benchmark] + public void SimdVector256() + { + JpegColorConverterBase.ComponentValues values = new(this.Input, 0); + + new JpegColorConverterBase.YCbCrVector256(8).ConvertToRgbInPlace(values); + } + + [Benchmark] + public void SimdVector512() + { + JpegColorConverterBase.ComponentValues values = new(this.Input, 0); + + new JpegColorConverterBase.YCbCrVector512(8).ConvertToRgbInPlace(values); + } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 02a5d89079..69faa0bf41 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using SixLabors.ImageSharp.ColorProfiles; @@ -180,9 +181,17 @@ public class JpegColorConverterTests { // arrange Type expectedType = typeof(JpegColorConverterBase.YCbCrScalar); - if (Avx.IsSupported) + if (JpegColorConverterBase.JpegColorConverterVector512.IsSupported) { - expectedType = typeof(JpegColorConverterBase.YCbCrVector); + expectedType = typeof(JpegColorConverterBase.YCbCrVector512); + } + else if (JpegColorConverterBase.JpegColorConverterVector256.IsSupported) + { + expectedType = typeof(JpegColorConverterBase.YCbCrVector256); + } + else if (JpegColorConverterBase.JpegColorConverterVector128.IsSupported) + { + expectedType = typeof(JpegColorConverterBase.YCbCrVector128); } else if (Sse2.IsSupported) {