From 20a0d846768bb7662fc19cb6ae88648b5b3a0810 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 05:09:53 +0300 Subject: [PATCH] Moved jpeg matrix scaler to jpeg converter --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 19 ------------- .../Encoder/RgbToYCbCrConverterVectorized.cs | 27 ++++++++++++++++--- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index caeb694a99..b530a37e77 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -577,25 +577,6 @@ namespace SixLabors.ImageSharp } } - /// - /// Scales 8x8 matrix to 4x2 using 2x2 average - /// - /// Input matrix consisting of 4 256bit vectors, first row: (v[0], v[2]), second row: (v[1], v[3]) - /// 256bit vector containing upper and lower scaled parts of the input matrix - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 Scale16x2_8x1(ReadOnlySpan> v) - { - DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements"); - - var f025 = Vector256.Create(0.25f); - - Vector256 left = Avx.Add(v[0], v[2]); - Vector256 right = Avx.Add(v[1], v[3]); - Vector256 avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025); - - return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle(); - } - /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 56da8acc71..1b7df596c9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -221,9 +221,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder bDataLanes[j] = b; } - r = SimdUtils.HwIntrinsics.Scale16x2_8x1(rDataLanes); - g = SimdUtils.HwIntrinsics.Scale16x2_8x1(gDataLanes); - b = SimdUtils.HwIntrinsics.Scale16x2_8x1(bDataLanes); + r = Scale16x2_8x1(rDataLanes); + g = Scale16x2_8x1(gDataLanes); + b = Scale16x2_8x1(bDataLanes); // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); @@ -233,5 +233,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #endif } + +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// Scales 16x2 matrix to 8x1 using 2x2 average + /// + /// Input matrix consisting of 4 256bit vectors + /// 256bit vector containing upper and lower scaled parts of the input matrix + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector256 Scale16x2_8x1(ReadOnlySpan> v) + { + DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements"); + + var f025 = Vector256.Create(0.25f); + + Vector256 left = Avx.Add(v[0], v[2]); + Vector256 right = Avx.Add(v[1], v[3]); + Vector256 avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025); + + return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle(); + } } +#endif }