diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 0320229a2b..dc41e179e7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -83,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert(frame, x, y, ref currentRows); + pixelConverter.Convert444(frame, x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, @@ -123,9 +123,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Block8x8F b = default; - Span cb = stackalloc Block8x8F[4]; - Span cr = stackalloc Block8x8F[4]; + Span temporalBlocks = stackalloc Block8x8F[2]; var unzig = ZigZag.CreateUnzigTable(); @@ -148,32 +146,29 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yOff = (i & 2) * 4; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows); - - cb[i] = pixelConverter.Cb; - cr[i] = pixelConverter.Cr; + pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, ref temporalBlocks[0], i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.Y, + ref temporalBlocks[0], ref luminanceQuantTable, ref unzig); } - Block8x8F.Scale16X16To8X8(ref b, cb); + pixelConverter.ConvertCbCr(ref temporalBlocks[0], ref temporalBlocks[1]); + prevDCCb = this.WriteBlock( QuantIndex.Chrominance, prevDCCb, - ref b, + ref temporalBlocks[0], ref chrominanceQuantTable, ref unzig); - Block8x8F.Scale16X16To8X8(ref b, cr); prevDCCr = this.WriteBlock( QuantIndex.Chrominance, prevDCCr, - ref b, + ref temporalBlocks[1], ref chrominanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 62e82243cb..9760e9e93c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif /// - /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:4:4 subsampling /// /// Total size of rgb span must be 200 bytes /// Span of rgb pixels with size of 64 @@ -120,5 +120,144 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #endif } + + /// + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:2:0 subsampling + /// + /// Total size of rgb span must be 200 bytes + /// Span of rgb pixels with size of 64 + /// 8x8 destination matrix of Luminance(Y) converted data + /// + /// + /// + /// + public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F rAcc, ref Block8x8F gAcc, ref Block8x8F bAcc, int idx) + { + Debug.Assert(IsSupported, "AVX2 is required to run this converter"); + +#if SUPPORTS_RUNTIME_INTRINSICS + var f0299 = Vector256.Create(0.299f); + var f0587 = Vector256.Create(0.587f); + var f0114 = Vector256.Create(0.114f); + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + var zero = Vector256.Create(0).AsByte(); + + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + ref Vector256 destYRef = ref yBlock.V0; + + int destOffset = (idx & 2) * 4 + (idx & 1); + + ref Vector128 destRedRef = ref Unsafe.Add(ref Unsafe.As>(ref rAcc), destOffset); + ref Vector128 destGreenRef = ref Unsafe.Add(ref Unsafe.As>(ref gAcc), destOffset); + ref Vector128 destBlueRef = ref Unsafe.Add(ref Unsafe.As>(ref bAcc), destOffset); + + var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); + var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); + Vector256 rgb, rg, bx; + Vector256 r, g, b; + + Span> rDataLanes = stackalloc Vector256[4]; + Span> gDataLanes = stackalloc Vector256[4]; + Span> bDataLanes = stackalloc Vector256[4]; + + const int bytesPerRgbStride = 24; + for (int i = 0; i < 2; i++) + { + // each 4 lanes - [0, 1, 2, 3] & [4, 5, 6, 7] + for (int j = 0; j < 4; j++) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref destYRef, i * 4 + j) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + int localDestOffset = (i & 1) * 4; + + // red + Vector256 twoLane = Scale_8x4_4x2(rDataLanes); + Unsafe.Add(ref destRedRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destRedRef, localDestOffset + 2) = twoLane.GetUpper(); + + // green + twoLane = Scale_8x4_4x2(gDataLanes); + Unsafe.Add(ref destGreenRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destGreenRef, localDestOffset + 2) = twoLane.GetUpper(); + + // blue + twoLane = Scale_8x4_4x2(bDataLanes); + Unsafe.Add(ref destBlueRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destBlueRef, localDestOffset + 2) = twoLane.GetUpper(); + } +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Scale_8x4_4x2(Span> v) + { + Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); + var f025 = Vector256.Create(0.25f); + + Vector256 topPairSum = SumHorizontalPairs(v[0], v[1]); + Vector256 botPairSum = SumHorizontalPairs(v[2], v[3]); + + return Avx2.PermuteVar8x32(Avx.Multiply(SumVerticalPairs(topPairSum, botPairSum), f025), switchInnerDoubleWords); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SumHorizontalPairs(Vector256 v0, Vector256 v1) + => Avx.Add(Avx.Shuffle(v0, v1, 0b10_00_10_00), Avx.Shuffle(v0, v1, 0b11_01_11_01)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SumVerticalPairs(Vector256 v0, Vector256 v1) + => Avx.Add(Avx.Shuffle(v0, v1, 0b01_00_01_00), Avx.Shuffle(v0, v1, 0b11_10_11_10)); + + public static void ConvertCbCr(ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) + { + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + + ref Vector256 destCbRef = ref cbBlock.V0; + ref Vector256 destCrRef = ref crBlock.V0; + + ref Vector256 rRef = ref rBlock.V0; + ref Vector256 gRef = ref gBlock.V0; + ref Vector256 bRef = ref bBlock.V0; + + for (int i = 0; i < 8; i++) + { + ref Vector256 r = ref Unsafe.Add(ref rRef, i); + ref Vector256 g = ref Unsafe.Add(ref gRef, i); + ref Vector256 b = ref Unsafe.Add(ref bRef, i); + + // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) + Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); + + // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) + Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); + } + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index ee4626b86a..7bf7b8547e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -84,5 +84,41 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, ref Block8x8F yBlock, int idx) + { + this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); + + ref Block8x8F rSub = ref this.Y; + ref Block8x8F gSub = ref this.Cb; + ref Block8x8F bSub = ref this.Cr; + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref yBlock, ref rSub, ref gSub, ref bSub, idx); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + + public void ConvertCbCr(ref Block8x8F cb, ref Block8x8F cr) + { + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.ConvertCbCr(ref this.Y, ref this.Cb, ref this.Cr, ref cb, ref cr); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + } + } } }