From 953095f1b981a59372bfc7b7c7c94ce8d4d68002 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 18:52:03 +0300 Subject: [PATCH] 420 converter fixes --- .../Components/Encoder/HuffmanScanEncoder.cs | 10 +++--- .../Encoder/RgbToYCbCrConverterVectorized.cs | 35 +++++++++++++++++-- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 4fbd9e4ecb..3231c5781e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { var unzig = ZigZag.CreateUnzigTable(); - var pixelConverter = YCbCrForwardConverter444.Create(); + var pixelConverter = YCbCrForwardConverter420.Create(); // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; @@ -138,23 +138,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder cancellationToken.ThrowIfCancellationRequested(); for (int x = 0; x < pixels.Width; x += 16) { - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { int yOff = i * 8; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert420(frame, x, y, ref currentRows, i); + pixelConverter.Convert(frame, x, y, ref currentRows, i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.twinBlocksY[0], + ref pixelConverter.YLeft, ref luminanceQuantTable, ref unzig); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.twinBlocksY[1], + ref pixelConverter.YRight, ref luminanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index e5fe4dea2f..cf4d477749 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -28,6 +28,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #if SUPPORTS_RUNTIME_INTRINSICS + // TODO: documentation + public const int AvxRegisterRgbCompatibilityOffset = 8; + private static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, @@ -205,7 +208,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling /// - public static void Convert420_16x8(ReadOnlySpan rgbSpan, Span yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + public static void Convert420_16x8(ReadOnlySpan rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -241,7 +244,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int i = 0; i < 4; i++) { // 16x2 => 8x1 - for (int j = 0; j < 4; j++) + // left 8x8 column conversions + for (int j = 0; j < 4; j += 2) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref yBlockLeft.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + // 16x2 => 8x1 + // right 8x8 column conversions + for (int j = 1; j < 4; j += 2) { rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); @@ -257,7 +286,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); // (0.299F * r) + (0.587F * g) + (0.114F * b); - Unsafe.Add(ref yBlocks[j & 1].V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + Unsafe.Add(ref yBlockRight.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); rDataLanes[j] = r; gDataLanes[j] = g;