From 6ac2b6660bf015ee95637c7af948bbffa18a1c4f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 14:21:49 +0300 Subject: [PATCH 01/23] Added comments to vectorized rgb->ycbcr converter for further code changes --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 3ee1ca9890..a6ff21bdc9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -47,6 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder }; #endif + /// + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices + /// + /// Total size of rgb span must be 200 bytes + /// Span of rgb pixels with size of 64 + /// 8x8 destination matrix of Luminance(Y) converted data + /// 8x8 destination matrix of Chrominance(Cb) converted data + /// 8x8 destination matrix of Chrominance(Cr) converted data public static void Convert(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -63,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder var f05 = Vector256.Create(0.5f); var zero = Vector256.Create(0).AsByte(); - ref Vector256 inRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); ref Vector256 destYRef = ref yBlock.V0; ref Vector256 destCbRef = ref cbBlock.V0; ref Vector256 destCrRef = ref crBlock.V0; @@ -72,9 +80,31 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); Vector256 rgb, rg, bx; Vector256 r, g, b; + + // TODO: probably remove this after the draft + // rgbByteSpan contains 8 strides by 8 pixels each, thus 64 pixels total + // Strides are stored sequentially - one big span of 64 * 3 = 192 bytes + // Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits + // Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride: + // stride 0 0 - 192 -(+64bits)-> 256 + // stride 1 192 - 384 -(+64bits)-> 448 + // stride 2 384 - 576 -(+64bits)-> 640 + // stride 3 576 - 768 -(+64bits)-> 832 + // stride 4 768 - 960 -(+64bits)-> 1024 + // stride 5 960 - 1152 -(+64bits)-> 1216 + // stride 6 1152 - 1344 -(+64bits)-> 1408 + // stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION + // + // Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits + // This is not permitted - we are reading foreign memory + // That's why last stride is calculated outside of the for-loop loop with special extract shuffle mask involved + // + // Extra mask & separate stride:7 calculations can be eliminated by simply providing rgb pixel span of slightly bigger size than pixels data need: + // Total pixel data size is 192 bytes, avx registers need it to be 200 bytes + const int bytesPerRgbStride = 24; for (int i = 0; i < 7; i++) { - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte(); + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); rgb = Avx2.Shuffle(rgb, extractRgbMask); @@ -96,7 +126,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes)); - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); rgb = Avx2.Shuffle(rgb, extractRgbMask); rg = Avx2.UnpackLow(rgb, zero); From a845c00f6f5698dc2ba5e11a39791d49bc443eb6 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 14:47:06 +0300 Subject: [PATCH 02/23] Simplified RgbToYCbCrConverterVectorized.Convert() method --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 28 +------------------ .../Encoder/YCbCrForwardConverter{TPixel}.cs | 17 +++++++---- 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index a6ff21bdc9..62e82243cb 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -34,12 +34,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; - private static ReadOnlySpan MoveLast24BytesToSeparateLanes => new byte[] - { - 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, - 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0 - }; - private static ReadOnlySpan ExtractRgb => new byte[] { 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, @@ -102,7 +96,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // Extra mask & separate stride:7 calculations can be eliminated by simply providing rgb pixel span of slightly bigger size than pixels data need: // Total pixel data size is 192 bytes, avx registers need it to be 200 bytes const int bytesPerRgbStride = 24; - for (int i = 0; i < 7; i++) + for (int i = 0; i < 8; i++) { rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); @@ -124,26 +118,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); } - - extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes)); - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); - rgb = Avx2.Shuffle(rgb, extractRgbMask); - - rg = Avx2.UnpackLow(rgb, zero); - bx = Avx2.UnpackHigh(rgb, zero); - - r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); - g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); - b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); - - // (0.299F * r) + (0.587F * g) + (0.114F * b); - Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); - - // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) - Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); - - // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) - Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); #endif } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 81e64b277b..ee4626b86a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -42,14 +43,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Temporal RGB block /// - private GenericBlock8x8 rgbBlock; + private Span rgbSpan; public static YCbCrForwardConverter Create() { var result = default(YCbCrForwardConverter); + + // creating rgb pixel bufferr + // TODO: this is subject to discuss + result.rgbSpan = MemoryMarshal.Cast(new byte[200].AsSpan()); + + // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) { - // Avoid creating lookup tables, when vectorized converter is supported result.colorTables = RgbToYCbCrConverterLut.Create(); } @@ -63,8 +69,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); - Span rgbSpan = this.rgbBlock.AsSpanUnsafe(); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan); + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); ref Block8x8F yBlock = ref this.Y; ref Block8x8F cbBlock = ref this.Cb; @@ -72,11 +77,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } else { - this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } } From 2ad3ddb0364784916b95bce180618da7b279783b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 19:31:39 +0300 Subject: [PATCH 03/23] [WIP] Introduced RgbToYCbCrConverterVectorized 420 sampling --- .../Components/Encoder/HuffmanScanEncoder.cs | 21 +-- .../Encoder/RgbToYCbCrConverterVectorized.cs | 141 +++++++++++++++++- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 36 +++++ 3 files changed, 184 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 0320229a2b..dc41e179e7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -83,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert(frame, x, y, ref currentRows); + pixelConverter.Convert444(frame, x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, @@ -123,9 +123,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Block8x8F b = default; - Span cb = stackalloc Block8x8F[4]; - Span cr = stackalloc Block8x8F[4]; + Span temporalBlocks = stackalloc Block8x8F[2]; var unzig = ZigZag.CreateUnzigTable(); @@ -148,32 +146,29 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yOff = (i & 2) * 4; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows); - - cb[i] = pixelConverter.Cb; - cr[i] = pixelConverter.Cr; + pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, ref temporalBlocks[0], i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.Y, + ref temporalBlocks[0], ref luminanceQuantTable, ref unzig); } - Block8x8F.Scale16X16To8X8(ref b, cb); + pixelConverter.ConvertCbCr(ref temporalBlocks[0], ref temporalBlocks[1]); + prevDCCb = this.WriteBlock( QuantIndex.Chrominance, prevDCCb, - ref b, + ref temporalBlocks[0], ref chrominanceQuantTable, ref unzig); - Block8x8F.Scale16X16To8X8(ref b, cr); prevDCCr = this.WriteBlock( QuantIndex.Chrominance, prevDCCr, - ref b, + ref temporalBlocks[1], ref chrominanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 62e82243cb..9760e9e93c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif /// - /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:4:4 subsampling /// /// Total size of rgb span must be 200 bytes /// Span of rgb pixels with size of 64 @@ -120,5 +120,144 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #endif } + + /// + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:2:0 subsampling + /// + /// Total size of rgb span must be 200 bytes + /// Span of rgb pixels with size of 64 + /// 8x8 destination matrix of Luminance(Y) converted data + /// + /// + /// + /// + public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F rAcc, ref Block8x8F gAcc, ref Block8x8F bAcc, int idx) + { + Debug.Assert(IsSupported, "AVX2 is required to run this converter"); + +#if SUPPORTS_RUNTIME_INTRINSICS + var f0299 = Vector256.Create(0.299f); + var f0587 = Vector256.Create(0.587f); + var f0114 = Vector256.Create(0.114f); + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + var zero = Vector256.Create(0).AsByte(); + + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + ref Vector256 destYRef = ref yBlock.V0; + + int destOffset = (idx & 2) * 4 + (idx & 1); + + ref Vector128 destRedRef = ref Unsafe.Add(ref Unsafe.As>(ref rAcc), destOffset); + ref Vector128 destGreenRef = ref Unsafe.Add(ref Unsafe.As>(ref gAcc), destOffset); + ref Vector128 destBlueRef = ref Unsafe.Add(ref Unsafe.As>(ref bAcc), destOffset); + + var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); + var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); + Vector256 rgb, rg, bx; + Vector256 r, g, b; + + Span> rDataLanes = stackalloc Vector256[4]; + Span> gDataLanes = stackalloc Vector256[4]; + Span> bDataLanes = stackalloc Vector256[4]; + + const int bytesPerRgbStride = 24; + for (int i = 0; i < 2; i++) + { + // each 4 lanes - [0, 1, 2, 3] & [4, 5, 6, 7] + for (int j = 0; j < 4; j++) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref destYRef, i * 4 + j) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + int localDestOffset = (i & 1) * 4; + + // red + Vector256 twoLane = Scale_8x4_4x2(rDataLanes); + Unsafe.Add(ref destRedRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destRedRef, localDestOffset + 2) = twoLane.GetUpper(); + + // green + twoLane = Scale_8x4_4x2(gDataLanes); + Unsafe.Add(ref destGreenRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destGreenRef, localDestOffset + 2) = twoLane.GetUpper(); + + // blue + twoLane = Scale_8x4_4x2(bDataLanes); + Unsafe.Add(ref destBlueRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destBlueRef, localDestOffset + 2) = twoLane.GetUpper(); + } +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Scale_8x4_4x2(Span> v) + { + Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); + var f025 = Vector256.Create(0.25f); + + Vector256 topPairSum = SumHorizontalPairs(v[0], v[1]); + Vector256 botPairSum = SumHorizontalPairs(v[2], v[3]); + + return Avx2.PermuteVar8x32(Avx.Multiply(SumVerticalPairs(topPairSum, botPairSum), f025), switchInnerDoubleWords); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SumHorizontalPairs(Vector256 v0, Vector256 v1) + => Avx.Add(Avx.Shuffle(v0, v1, 0b10_00_10_00), Avx.Shuffle(v0, v1, 0b11_01_11_01)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SumVerticalPairs(Vector256 v0, Vector256 v1) + => Avx.Add(Avx.Shuffle(v0, v1, 0b01_00_01_00), Avx.Shuffle(v0, v1, 0b11_10_11_10)); + + public static void ConvertCbCr(ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) + { + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + + ref Vector256 destCbRef = ref cbBlock.V0; + ref Vector256 destCrRef = ref crBlock.V0; + + ref Vector256 rRef = ref rBlock.V0; + ref Vector256 gRef = ref gBlock.V0; + ref Vector256 bRef = ref bBlock.V0; + + for (int i = 0; i < 8; i++) + { + ref Vector256 r = ref Unsafe.Add(ref rRef, i); + ref Vector256 g = ref Unsafe.Add(ref gRef, i); + ref Vector256 b = ref Unsafe.Add(ref bRef, i); + + // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) + Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); + + // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) + Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); + } + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index ee4626b86a..7bf7b8547e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -84,5 +84,41 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, ref Block8x8F yBlock, int idx) + { + this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); + + ref Block8x8F rSub = ref this.Y; + ref Block8x8F gSub = ref this.Cb; + ref Block8x8F bSub = ref this.Cr; + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref yBlock, ref rSub, ref gSub, ref bSub, idx); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + + public void ConvertCbCr(ref Block8x8F cb, ref Block8x8F cr) + { + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.ConvertCbCr(ref this.Y, ref this.Cb, ref this.Cr, ref cb, ref cr); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + } + } } } From 201c5341e69fbedcbe5bc619edb81ee85419321f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 19:40:13 +0300 Subject: [PATCH 04/23] Fixed HuffmanScanEncoder error --- .../Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index dc41e179e7..3d99a1b95a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -83,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert444(frame, x, y, ref currentRows); + pixelConverter.Convert(frame, x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, From 8a7749644ab7b1170fc86194b400007885144678 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 21:19:36 +0300 Subject: [PATCH 05/23] Imporved internal rgb -> rcbcr conversion api for 420 subsampling --- .../Components/Encoder/HuffmanScanEncoder.cs | 10 +++--- .../Encoder/RgbToYCbCrConverterVectorized.cs | 36 ++++++++----------- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 20 ++--------- 3 files changed, 21 insertions(+), 45 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 3d99a1b95a..ff5ce957e0 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -146,29 +146,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yOff = (i & 2) * 4; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, ref temporalBlocks[0], i); + pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref temporalBlocks[0], + ref pixelConverter.Y, ref luminanceQuantTable, ref unzig); } - pixelConverter.ConvertCbCr(ref temporalBlocks[0], ref temporalBlocks[1]); - prevDCCb = this.WriteBlock( QuantIndex.Chrominance, prevDCCb, - ref temporalBlocks[0], + ref pixelConverter.Cb, ref chrominanceQuantTable, ref unzig); prevDCCr = this.WriteBlock( QuantIndex.Chrominance, prevDCCr, - ref temporalBlocks[1], + ref pixelConverter.Cr, ref chrominanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 9760e9e93c..055c7176a7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -126,12 +126,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Total size of rgb span must be 200 bytes /// Span of rgb pixels with size of 64 - /// 8x8 destination matrix of Luminance(Y) converted data - /// - /// - /// - /// - public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F rAcc, ref Block8x8F gAcc, ref Block8x8F bAcc, int idx) + /// 8x8 destination matrix of Luminance(Y) converted dataф + public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock, int idx) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -152,9 +148,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int destOffset = (idx & 2) * 4 + (idx & 1); - ref Vector128 destRedRef = ref Unsafe.Add(ref Unsafe.As>(ref rAcc), destOffset); - ref Vector128 destGreenRef = ref Unsafe.Add(ref Unsafe.As>(ref gAcc), destOffset); - ref Vector128 destBlueRef = ref Unsafe.Add(ref Unsafe.As>(ref bAcc), destOffset); + ref Vector128 destCbRef = ref Unsafe.Add(ref Unsafe.As>(ref cbBlock), destOffset); + ref Vector128 destCrRef = ref Unsafe.Add(ref Unsafe.As>(ref crBlock), destOffset); var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); @@ -192,20 +187,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int localDestOffset = (i & 1) * 4; - // red - Vector256 twoLane = Scale_8x4_4x2(rDataLanes); - Unsafe.Add(ref destRedRef, localDestOffset) = twoLane.GetLower(); - Unsafe.Add(ref destRedRef, localDestOffset + 2) = twoLane.GetUpper(); + r = Scale_8x4_4x2(rDataLanes); + g = Scale_8x4_4x2(gDataLanes); + b = Scale_8x4_4x2(bDataLanes); - // green - twoLane = Scale_8x4_4x2(gDataLanes); - Unsafe.Add(ref destGreenRef, localDestOffset) = twoLane.GetLower(); - Unsafe.Add(ref destGreenRef, localDestOffset + 2) = twoLane.GetUpper(); + // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) + Vector256 cb = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); + Unsafe.Add(ref destCbRef, localDestOffset) = cb.GetLower(); + Unsafe.Add(ref destCbRef, localDestOffset + 2) = cb.GetUpper(); - // blue - twoLane = Scale_8x4_4x2(bDataLanes); - Unsafe.Add(ref destBlueRef, localDestOffset) = twoLane.GetLower(); - Unsafe.Add(ref destBlueRef, localDestOffset + 2) = twoLane.GetUpper(); + // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) + Vector256 cr = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); + Unsafe.Add(ref destCrRef, localDestOffset) = cr.GetLower(); + Unsafe.Add(ref destCrRef, localDestOffset + 2) = cr.GetUpper(); } #endif } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 7bf7b8547e..c835e8df81 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -88,19 +88,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) /// - public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, ref Block8x8F yBlock, int idx) + public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); - ref Block8x8F rSub = ref this.Y; - ref Block8x8F gSub = ref this.Cb; - ref Block8x8F bSub = ref this.Cr; - if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref yBlock, ref rSub, ref gSub, ref bSub, idx); + RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.Y, ref this.Cb, ref this.Cr, idx); } else { @@ -108,17 +104,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } - - public void ConvertCbCr(ref Block8x8F cb, ref Block8x8F cr) - { - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.ConvertCbCr(ref this.Y, ref this.Cb, ref this.Cr, ref cb, ref cr); - } - else - { - throw new NotSupportedException("This is not yet implemented"); - } - } } } From 052ebde3ad4abd3a68d9648a66fc4ae9be37df82 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 22:31:17 +0300 Subject: [PATCH 06/23] Replaced GenericBlocl8x8 with Span in ycbcr converter --- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 65 +++++++++++++++++-- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index c835e8df81..952dde1112 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -38,7 +39,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Temporal 8x8 block to hold TPixel data /// - private GenericBlock8x8 pixelBlock; + private Span pixelSpan; /// /// Temporal RGB block @@ -52,6 +53,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // creating rgb pixel bufferr // TODO: this is subject to discuss result.rgbSpan = MemoryMarshal.Cast(new byte[200].AsSpan()); + result.pixelSpan = new TPixel[64].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -67,9 +69,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) { - this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + Memory.Buffer2D buffer = frame.PixelBuffer; + LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); ref Block8x8F yBlock = ref this.Y; ref Block8x8F cbBlock = ref this.Cb; @@ -90,9 +93,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { - this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + Memory.Buffer2D buffer = frame.PixelBuffer; + LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(8, buffer.Width - x), Math.Min(8, buffer.Height - y)); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); if (RgbToYCbCrConverterVectorized.IsSupported) { @@ -104,5 +108,56 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } + + // TODO: add DebugGuard checks? + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height) + { + //Guard.MustBeBetweenOrEqualTo(width, 1, 8, nameof(width)); + //Guard.MustBeBetweenOrEqualTo(height, 1, 8, nameof(width)); + + // TODO: this is a strange check, most likely it was introduces due to 2x 8x8 blocks subsampling, should be gone after new 4:2:0 implementation + if (width <= 0 || height <= 0) + { + return; + } + + uint byteWidth = (uint)(width * Unsafe.SizeOf()); + int remainderXCount = 8 - width; + + ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(dest)); + int rowSizeInBytes = 8 * Unsafe.SizeOf(); + + for (int y = 0; y < height; y++) + { + Span row = source[y]; + + ref byte s = ref Unsafe.As(ref row[startX]); + ref byte d = ref Unsafe.Add(ref blockStart, y * rowSizeInBytes); + + Unsafe.CopyBlock(ref d, ref s, byteWidth); + + ref TPixel last = ref Unsafe.Add(ref Unsafe.As(ref d), width - 1); + + for (int x = 1; x <= remainderXCount; x++) + { + Unsafe.Add(ref last, x) = last; + } + } + + int remainderYCount = 8 - height; + + if (remainderYCount == 0) + { + return; + } + + ref byte lastRowStart = ref Unsafe.Add(ref blockStart, (height - 1) * rowSizeInBytes); + + for (int y = 1; y <= remainderYCount; y++) + { + ref byte remStart = ref Unsafe.Add(ref lastRowStart, rowSizeInBytes * y); + Unsafe.CopyBlock(ref remStart, ref lastRowStart, (uint)rowSizeInBytes); + } + } } } From d50e255c854cd3c3e46238f7588f102ea3298fd7 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 00:13:06 +0300 Subject: [PATCH 07/23] [WIP] Implemented 16x8 420 subsampling convertion --- .../Components/Encoder/HuffmanScanEncoder.cs | 19 ++-- .../Encoder/RgbToYCbCrConverterVectorized.cs | 86 ++++++++++++++++++- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 24 ++++-- 3 files changed, 110 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index ff5ce957e0..f6e55153aa 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -123,8 +123,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Span temporalBlocks = stackalloc Block8x8F[2]; - var unzig = ZigZag.CreateUnzigTable(); var pixelConverter = YCbCrForwardConverter.Create(); @@ -140,18 +138,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder cancellationToken.ThrowIfCancellationRequested(); for (int x = 0; x < pixels.Width; x += 16) { - for (int i = 0; i < 4; i++) + for(int i = 0; i < 2; i++) { - int xOff = (i & 1) * 8; - int yOff = (i & 2) * 4; - + int yOff = i * 8; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, i); + pixelConverter.Convert420(frame, x, y, ref currentRows, i); + + prevDCY = this.WriteBlock( + QuantIndex.Luminance, + prevDCY, + ref pixelConverter.twinBlocksY[0], + ref luminanceQuantTable, + ref unzig); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.Y, + ref pixelConverter.twinBlocksY[1], ref luminanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 055c7176a7..a44b174d89 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -204,14 +204,96 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif } + /// + /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling + /// + /// + /// + /// + /// + /// + /// + public static void Convert420_16x8(ReadOnlySpan rgbSpan, Span yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + { + Debug.Assert(IsSupported, "AVX2 is required to run this converter"); + +#if SUPPORTS_RUNTIME_INTRINSICS + var f0299 = Vector256.Create(0.299f); + var f0587 = Vector256.Create(0.587f); + var f0114 = Vector256.Create(0.114f); + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + var zero = Vector256.Create(0).AsByte(); + + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + + int destOffset = row * 4; + + ref Vector256 destCbRef = ref Unsafe.Add(ref Unsafe.As>(ref cbBlock), destOffset); + ref Vector256 destCrRef = ref Unsafe.Add(ref Unsafe.As>(ref crBlock), destOffset); + + var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); + var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); + Vector256 rgb, rg, bx; + Vector256 r, g, b; + + Span> rDataLanes = stackalloc Vector256[4]; + Span> gDataLanes = stackalloc Vector256[4]; + Span> bDataLanes = stackalloc Vector256[4]; + + const int bytesPerRgbStride = 24; + for (int i = 0; i < 4; i++) + { + // 16x2 => 8x1 + for (int j = 0; j < 4; j++) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref yBlocks[j & 1].V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + r = Scale_8x4_4x2(rDataLanes); + g = Scale_8x4_4x2(gDataLanes); + b = Scale_8x4_4x2(bDataLanes); + + // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) + Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); + + // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) + Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); + } +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Scale_8x4_4x2(Span> v) { Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); var f025 = Vector256.Create(0.25f); - Vector256 topPairSum = SumHorizontalPairs(v[0], v[1]); - Vector256 botPairSum = SumHorizontalPairs(v[2], v[3]); + Vector256 topPairSum = SumHorizontalPairs(v[0], v[2]); + Vector256 botPairSum = SumHorizontalPairs(v[1], v[3]); return Avx2.PermuteVar8x32(Avx.Multiply(SumVerticalPairs(topPairSum, botPairSum), f025), switchInnerDoubleWords); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 952dde1112..120b21e107 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -46,14 +46,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; + public Span twinBlocksY; + public static YCbCrForwardConverter Create() { var result = default(YCbCrForwardConverter); // creating rgb pixel bufferr // TODO: this is subject to discuss - result.rgbSpan = MemoryMarshal.Cast(new byte[200].AsSpan()); - result.pixelSpan = new TPixel[64].AsSpan(); + const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + // TODO: this size should be configurable + result.pixelSpan = new TPixel[128].AsSpan(); + + result.twinBlocksY = new Block8x8F[2].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -70,7 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y, new Size(8)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); @@ -94,13 +100,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(8, buffer.Width - x), Math.Min(8, buffer.Height - y)); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(16, buffer.Width - x), Math.Min(8, buffer.Height - y), new Size(16, 8)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.Y, ref this.Cb, ref this.Cr, idx); + RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); } else { @@ -110,7 +116,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } // TODO: add DebugGuard checks? - private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height) + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height, Size areaSize) { //Guard.MustBeBetweenOrEqualTo(width, 1, 8, nameof(width)); //Guard.MustBeBetweenOrEqualTo(height, 1, 8, nameof(width)); @@ -122,10 +128,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } uint byteWidth = (uint)(width * Unsafe.SizeOf()); - int remainderXCount = 8 - width; + int remainderXCount = areaSize.Width - width; ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(dest)); - int rowSizeInBytes = 8 * Unsafe.SizeOf(); + int rowSizeInBytes = areaSize.Width * Unsafe.SizeOf(); for (int y = 0; y < height; y++) { @@ -144,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - int remainderYCount = 8 - height; + int remainderYCount = areaSize.Height - height; if (remainderYCount == 0) { From 5ed7e2d1b734c57148e1f6253aee45ae944f9c14 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 01:09:41 +0300 Subject: [PATCH 08/23] Added quality params to the jpeg encoder benchmark --- tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index e22259f764..e807c416bd 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -13,9 +13,10 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { public class EncodeJpeg { + [Params(50, 75, 95, 100)] + public int Quality; + private const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr; - // GDI+ most likely uses 75 as default quality - https://stackoverflow.com/questions/3957477/what-quality-level-does-image-save-use-for-jpeg-files - private const int EncodingQuality = 75; // GDI+ uses 4:2:0 subsampling private const JpegSubsample EncodingSubsampling = JpegSubsample.Ratio420; @@ -41,14 +42,14 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg this.bmpCore = Image.Load(this.bmpStream); this.bmpCore.Metadata.ExifProfile = null; - this.encoder = new JpegEncoder { Quality = EncodingQuality, Subsample = EncodingSubsampling }; + this.encoder = new JpegEncoder { Quality = Quality, Subsample = EncodingSubsampling }; this.bmpStream.Position = 0; this.bmpDrawing = SDImage.FromStream(this.bmpStream); this.jpegCodec = GetEncoder(ImageFormat.Jpeg); this.encoderParameters = new EncoderParameters(1); // Quality cast to long is necessary - this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)EncodingQuality); + this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)Quality); this.destinationStream = new MemoryStream(); } From d6db6b6be75dbc73dbb238cc02c6fcca31131d0c Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 01:23:09 +0300 Subject: [PATCH 09/23] Fixed compilation errors for non-intrinsic platforms --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index a44b174d89..e5fe4dea2f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -125,8 +125,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:2:0 subsampling /// /// Total size of rgb span must be 200 bytes - /// Span of rgb pixels with size of 64 - /// 8x8 destination matrix of Luminance(Y) converted dataф public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock, int idx) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -207,12 +205,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling /// - /// - /// - /// - /// - /// - /// public static void Convert420_16x8(ReadOnlySpan rgbSpan, Span yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -286,6 +278,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } +#if SUPPORTS_RUNTIME_INTRINSICS [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Scale_8x4_4x2(Span> v) { @@ -335,5 +328,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); } } +#endif } } From 39569866fc022d08e431dd11c4eda5b9985b40f8 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 11:43:32 +0300 Subject: [PATCH 10/23] Added debug guard checks to LoadAndStretchEdges --- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 120b21e107..a059f978d1 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -115,17 +115,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - // TODO: add DebugGuard checks? + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height, Size areaSize) { - //Guard.MustBeBetweenOrEqualTo(width, 1, 8, nameof(width)); - //Guard.MustBeBetweenOrEqualTo(height, 1, 8, nameof(width)); - - // TODO: this is a strange check, most likely it was introduces due to 2x 8x8 blocks subsampling, should be gone after new 4:2:0 implementation - if (width <= 0 || height <= 0) - { - return; - } + DebugGuard.MustBeBetweenOrEqualTo(width, 1, areaSize.Width, nameof(width)); + DebugGuard.MustBeBetweenOrEqualTo(height, 1, areaSize.Height, nameof(height)); uint byteWidth = (uint)(width * Unsafe.SizeOf()); int remainderXCount = areaSize.Width - width; From 0d94435d653d5dc9cf88e162182a7e3be84c15b1 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 12:55:23 +0300 Subject: [PATCH 11/23] Simplified LoadAndStretchEdges call logic --- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index a059f978d1..963e6dd9ea 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -76,7 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y, new Size(8)); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(8), new Size(buffer.Width, buffer.Height)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); @@ -100,7 +100,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(16, buffer.Width - x), Math.Min(8, buffer.Height - y), new Size(16, 8)); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(16, 8), new Size(buffer.Width, buffer.Height)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); @@ -116,10 +116,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } - private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height, Size areaSize) + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int startY, Size areaSize, Size borders) { - DebugGuard.MustBeBetweenOrEqualTo(width, 1, areaSize.Width, nameof(width)); - DebugGuard.MustBeBetweenOrEqualTo(height, 1, areaSize.Height, nameof(height)); + int width = Math.Min(areaSize.Width, borders.Width - startX); + int height = Math.Min(areaSize.Height, borders.Height - startY); uint byteWidth = (uint)(width * Unsafe.SizeOf()); int remainderXCount = areaSize.Width - width; From 13e7cf358fb64b18aa06ba646f0d6feedac426fc Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 15:43:48 +0300 Subject: [PATCH 12/23] Divided YCbCr converters into 444/420 subsampling categories --- .../Components/Encoder/HuffmanScanEncoder.cs | 4 +- .../YCbCrForwardConverter444{TPixel}.cs | 118 +++++++++++++++++ .../Encoder/YCbCrForwardConverter{TPixel}.cs | 122 ++---------------- 3 files changed, 130 insertions(+), 114 deletions(-) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index f6e55153aa..4fbd9e4ecb 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -71,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; - var pixelConverter = YCbCrForwardConverter.Create(); + var pixelConverter = YCbCrForwardConverter444.Create(); ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; RowOctet currentRows = default; @@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { var unzig = ZigZag.CreateUnzigTable(); - var pixelConverter = YCbCrForwardConverter.Create(); + var pixelConverter = YCbCrForwardConverter444.Create(); // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs new file mode 100644 index 0000000000..58bb1d559d --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -0,0 +1,118 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder +{ + /// + /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks. + /// + /// The pixel type to work on + internal ref struct YCbCrForwardConverter444 + where TPixel : unmanaged, IPixel + { + /// + /// The Y component + /// + public Block8x8F Y; + + /// + /// The Cb component + /// + public Block8x8F Cb; + + /// + /// The Cr component + /// + public Block8x8F Cr; + + /// + /// The color conversion tables + /// + private RgbToYCbCrConverterLut colorTables; + + /// + /// Temporal 8x8 block to hold TPixel data + /// + private Span pixelSpan; + + /// + /// Temporal RGB block + /// + private Span rgbSpan; + + public Span twinBlocksY; + + public static YCbCrForwardConverter444 Create() + { + var result = default(YCbCrForwardConverter444); + + // creating rgb pixel bufferr + // TODO: this is subject to discuss + const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + // TODO: this size should be configurable + result.pixelSpan = new TPixel[128].AsSpan(); + + result.twinBlocksY = new Block8x8F[2].AsSpan(); + + // Avoid creating lookup tables, when vectorized converter is supported + if (!RgbToYCbCrConverterVectorized.IsSupported) + { + result.colorTables = RgbToYCbCrConverterLut.Create(); + } + + return result; + } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) + { + Memory.Buffer2D buffer = frame.PixelBuffer; + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(8), new Size(buffer.Width, buffer.Height)); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + + ref Block8x8F yBlock = ref this.Y; + ref Block8x8F cbBlock = ref this.Cb; + ref Block8x8F crBlock = ref this.Cr; + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + else + { + this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) + { + Memory.Buffer2D buffer = frame.PixelBuffer; + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 963e6dd9ea..f5ef770914 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -4,134 +4,32 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { - /// - /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks. - /// - /// The pixel type to work on - internal ref struct YCbCrForwardConverter + internal static class YCbCrForwardConverter where TPixel : unmanaged, IPixel { - /// - /// The Y component - /// - public Block8x8F Y; - - /// - /// The Cb component - /// - public Block8x8F Cb; - - /// - /// The Cr component - /// - public Block8x8F Cr; - - /// - /// The color conversion tables - /// - private RgbToYCbCrConverterLut colorTables; - - /// - /// Temporal 8x8 block to hold TPixel data - /// - private Span pixelSpan; - - /// - /// Temporal RGB block - /// - private Span rgbSpan; - - public Span twinBlocksY; - - public static YCbCrForwardConverter Create() - { - var result = default(YCbCrForwardConverter); - - // creating rgb pixel bufferr - // TODO: this is subject to discuss - const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding - result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); - // TODO: this size should be configurable - result.pixelSpan = new TPixel[128].AsSpan(); - - result.twinBlocksY = new Block8x8F[2].AsSpan(); - - // Avoid creating lookup tables, when vectorized converter is supported - if (!RgbToYCbCrConverterVectorized.IsSupported) - { - result.colorTables = RgbToYCbCrConverterLut.Create(); - } - - return result; - } - - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) + public static void LoadAndStretchEdges(RowOctet source, Span dest, Point start, Size sampleSize, Size totalSize) { - Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(8), new Size(buffer.Width, buffer.Height)); - - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); - - ref Block8x8F yBlock = ref this.Y; - ref Block8x8F cbBlock = ref this.Cb; - ref Block8x8F crBlock = ref this.Cr; + DebugGuard.MustBeBetweenOrEqualTo(start.X, 1, totalSize.Width - 1, nameof(start.X)); + DebugGuard.MustBeBetweenOrEqualTo(start.Y, 1, totalSize.Height - 1, nameof(start.Y)); - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - else - { - this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - } - - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) - { - Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(16, 8), new Size(buffer.Width, buffer.Height)); - - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); - - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); - } - else - { - throw new NotSupportedException("This is not yet implemented"); - //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - } - - - private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int startY, Size areaSize, Size borders) - { - int width = Math.Min(areaSize.Width, borders.Width - startX); - int height = Math.Min(areaSize.Height, borders.Height - startY); + int width = Math.Min(sampleSize.Width, totalSize.Width - start.X); + int height = Math.Min(sampleSize.Height, totalSize.Height - start.Y); uint byteWidth = (uint)(width * Unsafe.SizeOf()); - int remainderXCount = areaSize.Width - width; + int remainderXCount = sampleSize.Width - width; ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(dest)); - int rowSizeInBytes = areaSize.Width * Unsafe.SizeOf(); + int rowSizeInBytes = sampleSize.Width * Unsafe.SizeOf(); for (int y = 0; y < height; y++) { Span row = source[y]; - ref byte s = ref Unsafe.As(ref row[startX]); + ref byte s = ref Unsafe.As(ref row[start.X]); ref byte d = ref Unsafe.Add(ref blockStart, y * rowSizeInBytes); Unsafe.CopyBlock(ref d, ref s, byteWidth); @@ -144,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - int remainderYCount = areaSize.Height - height; + int remainderYCount = sampleSize.Height - height; if (remainderYCount == 0) { From 12b4b83cb6df5499d0b2211ae8ddf4d6b7e88363 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 18:21:36 +0300 Subject: [PATCH 13/23] 444 converter fixes --- .../YCbCrForwardConverter444{TPixel}.cs | 42 ++++++------------- 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 58bb1d559d..8fef553026 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -16,6 +16,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter444 where TPixel : unmanaged, IPixel { + // TODO: documentation + private const int RgbSpanByteSize = 8 * 8 * 3; + // TODO: documentation + private const int PixelSpanSize = 8 * 8; + + /// /// The Y component /// @@ -37,29 +43,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder private RgbToYCbCrConverterLut colorTables; /// - /// Temporal 8x8 block to hold TPixel data + /// Temporal 64-byte span to hold unconverted TPixel data /// private Span pixelSpan; /// - /// Temporal RGB block + /// Temporal 64-byte span to hold converted Rgb24 data /// private Span rgbSpan; - public Span twinBlocksY; - public static YCbCrForwardConverter444 Create() { var result = default(YCbCrForwardConverter444); // creating rgb pixel bufferr // TODO: this is subject to discuss - const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding - result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); - // TODO: this size should be configurable - result.pixelSpan = new TPixel[128].AsSpan(); + // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + 8].AsSpan()); - result.twinBlocksY = new Block8x8F[2].AsSpan(); + // TODO: this is subject to discuss + result.pixelSpan = new TPixel[PixelSpanSize].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -93,26 +96,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } - - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) - { - Memory.Buffer2D buffer = frame.PixelBuffer; - YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); - - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); - - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); - } - else - { - throw new NotSupportedException("This is not yet implemented"); - //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - } } } From 953095f1b981a59372bfc7b7c7c94ce8d4d68002 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 18:52:03 +0300 Subject: [PATCH 14/23] 420 converter fixes --- .../Components/Encoder/HuffmanScanEncoder.cs | 10 +++--- .../Encoder/RgbToYCbCrConverterVectorized.cs | 35 +++++++++++++++++-- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 4fbd9e4ecb..3231c5781e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { var unzig = ZigZag.CreateUnzigTable(); - var pixelConverter = YCbCrForwardConverter444.Create(); + var pixelConverter = YCbCrForwardConverter420.Create(); // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; @@ -138,23 +138,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder cancellationToken.ThrowIfCancellationRequested(); for (int x = 0; x < pixels.Width; x += 16) { - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { int yOff = i * 8; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert420(frame, x, y, ref currentRows, i); + pixelConverter.Convert(frame, x, y, ref currentRows, i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.twinBlocksY[0], + ref pixelConverter.YLeft, ref luminanceQuantTable, ref unzig); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.twinBlocksY[1], + ref pixelConverter.YRight, ref luminanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index e5fe4dea2f..cf4d477749 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -28,6 +28,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #if SUPPORTS_RUNTIME_INTRINSICS + // TODO: documentation + public const int AvxRegisterRgbCompatibilityOffset = 8; + private static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, @@ -205,7 +208,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling /// - public static void Convert420_16x8(ReadOnlySpan rgbSpan, Span yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + public static void Convert420_16x8(ReadOnlySpan rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -241,7 +244,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int i = 0; i < 4; i++) { // 16x2 => 8x1 - for (int j = 0; j < 4; j++) + // left 8x8 column conversions + for (int j = 0; j < 4; j += 2) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref yBlockLeft.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + // 16x2 => 8x1 + // right 8x8 column conversions + for (int j = 1; j < 4; j += 2) { rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); @@ -257,7 +286,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); // (0.299F * r) + (0.587F * g) + (0.114F * b); - Unsafe.Add(ref yBlocks[j & 1].V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + Unsafe.Add(ref yBlockRight.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); rDataLanes[j] = r; gDataLanes[j] = g; From 5fc29a2e9899171878b6c703868f657c62f8e735 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 18:52:39 +0300 Subject: [PATCH 15/23] Introduced separate 420 converter --- .../YCbCrForwardConverter420{TPixel}.cs | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs new file mode 100644 index 0000000000..c831b611c8 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -0,0 +1,95 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder +{ + /// + /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks. + /// + /// The pixel type to work on + internal ref struct YCbCrForwardConverter420 + where TPixel : unmanaged, IPixel + { + /// + /// The left Y component + /// + public Block8x8F YLeft; + + /// + /// The left Y component + /// + public Block8x8F YRight; + + /// + /// The Cb component + /// + public Block8x8F Cb; + + /// + /// The Cr component + /// + public Block8x8F Cr; + + /// + /// The color conversion tables + /// + private RgbToYCbCrConverterLut colorTables; + + /// + /// Temporal 16x8 block to hold TPixel data + /// + private Span pixelSpan; + + /// + /// Temporal RGB block + /// + private Span rgbSpan; + + public static YCbCrForwardConverter420 Create() + { + var result = default(YCbCrForwardConverter420); + + // TODO: this is subject to discuss + const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + + // TODO: this size should be configurable + result.pixelSpan = new TPixel[128].AsSpan(); + + // Avoid creating lookup tables, when vectorized converter is supported + if (!RgbToYCbCrConverterVectorized.IsSupported) + { + result.colorTables = RgbToYCbCrConverterLut.Create(); + } + + return result; + } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) + { + Memory.Buffer2D buffer = frame.PixelBuffer; + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + } +} From cb1acaec78c92688774f7245c6ae7345a2aeda6a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 22:30:45 +0300 Subject: [PATCH 16/23] Finished 420 subsampling converter --- .../Components/Encoder/HuffmanScanEncoder.cs | 6 +- .../Encoder/RgbToYCbCrConverterVectorized.cs | 16 +++++- .../YCbCrForwardConverter420{TPixel}.cs | 55 +++++++++++++------ 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 3231c5781e..283a98fab6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -125,14 +125,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { var unzig = ZigZag.CreateUnzigTable(); - var pixelConverter = YCbCrForwardConverter420.Create(); - // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; RowOctet currentRows = default; + var pixelConverter = new YCbCrForwardConverter420(frame); + for (int y = 0; y < pixels.Height; y += 16) { cancellationToken.ThrowIfCancellationRequested(); @@ -142,7 +142,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { int yOff = i * 8; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert(frame, x, y, ref currentRows, i); + pixelConverter.Convert(x, y, ref currentRows, i); prevDCY = this.WriteBlock( QuantIndex.Luminance, diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index cf4d477749..b9f0fa4271 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -27,9 +27,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } + public static int AvxRegisterRgbCompatibilityPadding + { + get + { + if (IsSupported) + { + return 8; + } + + return 0; + } + } + #if SUPPORTS_RUNTIME_INTRINSICS - // TODO: documentation - public const int AvxRegisterRgbCompatibilityOffset = 8; private static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[] { @@ -306,7 +317,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif } - #if SUPPORTS_RUNTIME_INTRINSICS [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Scale_8x4_4x2(Span> v) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index c831b611c8..fdb41a8e20 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -16,6 +16,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter420 where TPixel : unmanaged, IPixel { + // TODO: docs + private const int PixelsPerSample = 16 * 8; + + // TODO: docs + private static int RgbSpanByteSize = PixelsPerSample * 3; + + // TODO: docs + private static readonly Size SampleSize = new Size(16, 8); + /// /// The left Y component /// @@ -51,35 +60,45 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; - public static YCbCrForwardConverter420 Create() + // TODO: docs + private Size samplingAreaSize; + + // TODO: docs + private Configuration config; + + + public YCbCrForwardConverter420(ImageFrame frame) { - var result = default(YCbCrForwardConverter420); + // matrices would be filled during convert calls + this.YLeft = default; + this.YRight = default; + this.Cb = default; + this.Cr = default; - // TODO: this is subject to discuss - const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding - result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + // temporal pixel buffers + this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); + this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxRegisterRgbCompatibilityPadding].AsSpan()); - // TODO: this size should be configurable - result.pixelSpan = new TPixel[128].AsSpan(); + // frame data + this.samplingAreaSize = new Size(frame.Width, frame.Height); + this.config = frame.GetConfiguration(); - // Avoid creating lookup tables, when vectorized converter is supported + // conversion vector fallback data if (!RgbToYCbCrConverterVectorized.IsSupported) { - result.colorTables = RgbToYCbCrConverterLut.Create(); + this.colorTables = RgbToYCbCrConverterLut.Create(); + } + else + { + this.colorTables = default; } - - return result; } - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) + public void Convert(int x, int y, ref RowOctet currentRows, int idx) { - Memory.Buffer2D buffer = frame.PixelBuffer; - YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + PixelOperations.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan); if (RgbToYCbCrConverterVectorized.IsSupported) { From 672da457d340b2ae6df50d880dfdba0f12c9e2ec Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 22:44:09 +0300 Subject: [PATCH 17/23] Finished 444 subsampling converter --- .../Components/Encoder/HuffmanScanEncoder.cs | 5 +- .../YCbCrForwardConverter444{TPixel}.cs | 53 +++++++++++++++---- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 283a98fab6..218b2b59c3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -71,11 +71,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; - var pixelConverter = YCbCrForwardConverter444.Create(); ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; RowOctet currentRows = default; + var pixelConverter = new YCbCrForwardConverter444(frame); + for (int y = 0; y < pixels.Height; y += 8) { cancellationToken.ThrowIfCancellationRequested(); @@ -83,7 +84,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert(frame, x, y, ref currentRows); + pixelConverter.Convert(x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 8fef553026..27f7e3ae9c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -16,10 +16,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter444 where TPixel : unmanaged, IPixel { - // TODO: documentation - private const int RgbSpanByteSize = 8 * 8 * 3; - // TODO: documentation - private const int PixelSpanSize = 8 * 8; + // TODO: docs + private const int PixelsPerSample = 8 * 8; + + // TODO: docs + private const int RgbSpanByteSize = PixelsPerSample * 3; + + // TODO: docs + private static readonly Size SampleSize = new Size(8, 8); /// @@ -52,6 +56,38 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; + // TODO: docs + private Size samplingAreaSize; + + // TODO: docs + private readonly Configuration config; + + public YCbCrForwardConverter444(ImageFrame frame) + { + // matrices would be filled during convert calls + this.Y = default; + this.Cb = default; + this.Cr = default; + + // temporal pixel buffers + this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); + this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxRegisterRgbCompatibilityPadding].AsSpan()); + + // frame data + this.samplingAreaSize = new Size(frame.Width, frame.Height); + this.config = frame.GetConfiguration(); + + // conversion vector fallback data + if (!RgbToYCbCrConverterVectorized.IsSupported) + { + this.colorTables = RgbToYCbCrConverterLut.Create(); + } + else + { + this.colorTables = default; + } + } + public static YCbCrForwardConverter444 Create() { var result = default(YCbCrForwardConverter444); @@ -62,7 +98,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder result.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + 8].AsSpan()); // TODO: this is subject to discuss - result.pixelSpan = new TPixel[PixelSpanSize].AsSpan(); + result.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -76,12 +112,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) /// - public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) + public void Convert(int x, int y, ref RowOctet currentRows) { - Memory.Buffer2D buffer = frame.PixelBuffer; - YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(8), new Size(buffer.Width, buffer.Height)); + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + PixelOperations.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan); ref Block8x8F yBlock = ref this.Y; ref Block8x8F cbBlock = ref this.Cb; From de176b699e377ce4da7f005c66a9351d77b8eed1 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 3 Jun 2021 17:35:23 +0300 Subject: [PATCH 18/23] Initial 420 subsampling lut conversion implementation --- .../Encoder/RgbToYCbCrConverterLut.cs | 90 +++++++++++++++++++ .../YCbCrForwardConverter420{TPixel}.cs | 3 +- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 1ceea1e08a..635e571b7d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -115,6 +115,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertPixelInto( + int r, + int g, + int b, + ref Block8x8F yResult, + int i) + { + // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); + yResult[i] = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertPixelInto( + int r, + int g, + int b, + ref Block8x8F cbResult, + ref Block8x8F crResult, + int i) + { + // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); + cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; + + // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); + crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; + } + public void Convert(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { ref Rgb24 rgbStart = ref rgbSpan[0]; @@ -134,6 +162,68 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } + public void Convert(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + { + ref Rgb24 rgbStart = ref rgbSpan[0]; + for (int i = 0; i < 8; i += 2) + { + Span r = stackalloc int[8]; + Span g = stackalloc int[8]; + Span b = stackalloc int[8]; + + for (int j = 0; j < 2; j++) + { + // left + ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); + for (int k = 0; k < 8; k += 2) + { + int r0 = Unsafe.Add(ref stride, k).R; + int g0 = Unsafe.Add(ref stride, k).G; + int b0 = Unsafe.Add(ref stride, k).B; + this.ConvertPixelInto(r0, g0, b0, ref yBlockLeft, (i + j) * 8 + k); + + int r1 = Unsafe.Add(ref stride, k + 1).R; + int g1 = Unsafe.Add(ref stride, k + 1).G; + int b1 = Unsafe.Add(ref stride, k + 1).B; + this.ConvertPixelInto(r1, g1, b1, ref yBlockLeft, (i + j) * 8 + k + 1); + + int idx = k / 2; + r[idx] += r0 + r1; + g[idx] += g0 + g1; + b[idx] += b0 + b1; + } + + // right + stride = ref Unsafe.Add(ref stride, 8); + for (int k = 0; k < 8; k += 2) + { + int r0 = Unsafe.Add(ref stride, k).R; + int g0 = Unsafe.Add(ref stride, k).G; + int b0 = Unsafe.Add(ref stride, k).B; + this.ConvertPixelInto(r0, g0, b0, ref yBlockRight, (i + j) * 8 + k); + + int r1 = Unsafe.Add(ref stride, k + 1).R; + int g1 = Unsafe.Add(ref stride, k + 1).G; + int b1 = Unsafe.Add(ref stride, k + 1).B; + this.ConvertPixelInto(r1, g1, b1, ref yBlockRight, (i + j) * 8 + k + 1); + + int idx = 4 + (k / 2); + r[idx] += r0 + r1; + g[idx] += g0 + g1; + b[idx] += b0 + b1; + } + } + + int writeIdx = + row * Block8x8F.Size / 2 // upper or lower part + + (i / 2) * 8; // which row + for (int j = 0; j < 8; j++) + { + this.ConvertPixelInto(r[j] / 4, g[j] / 4, b[j] / 4, ref cbBlock, ref crBlock, writeIdx + j); + } + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int Fix(float x) => (int)((x * (1L << ScaleBits)) + 0.5F); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index fdb41a8e20..2e8433cdc6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -106,8 +106,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } else { - throw new NotSupportedException("This is not yet implemented"); - //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); } } } From 7896e24606ba15500e43bcdaa856cebee9e42b67 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 13:47:10 +0300 Subject: [PATCH 19/23] Improved non-simd ycbcr lut converter code --- .../Encoder/RgbToYCbCrConverterLut.cs | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 635e571b7d..06e8f26b69 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -177,40 +177,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); for (int k = 0; k < 8; k += 2) { - int r0 = Unsafe.Add(ref stride, k).R; - int g0 = Unsafe.Add(ref stride, k).G; - int b0 = Unsafe.Add(ref stride, k).B; - this.ConvertPixelInto(r0, g0, b0, ref yBlockLeft, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockLeft, (i + j) * 8 + k); - int r1 = Unsafe.Add(ref stride, k + 1).R; - int g1 = Unsafe.Add(ref stride, k + 1).G; - int b1 = Unsafe.Add(ref stride, k + 1).B; - this.ConvertPixelInto(r1, g1, b1, ref yBlockLeft, (i + j) * 8 + k + 1); + Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockLeft, (i + j) * 8 + k + 1); int idx = k / 2; - r[idx] += r0 + r1; - g[idx] += g0 + g1; - b[idx] += b0 + b1; + r[idx] += px0.R + px1.R; + g[idx] += px0.G + px1.G; + b[idx] += px0.B + px1.B; } // right stride = ref Unsafe.Add(ref stride, 8); for (int k = 0; k < 8; k += 2) { - int r0 = Unsafe.Add(ref stride, k).R; - int g0 = Unsafe.Add(ref stride, k).G; - int b0 = Unsafe.Add(ref stride, k).B; - this.ConvertPixelInto(r0, g0, b0, ref yBlockRight, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRight, (i + j) * 8 + k); - int r1 = Unsafe.Add(ref stride, k + 1).R; - int g1 = Unsafe.Add(ref stride, k + 1).G; - int b1 = Unsafe.Add(ref stride, k + 1).B; - this.ConvertPixelInto(r1, g1, b1, ref yBlockRight, (i + j) * 8 + k + 1); + Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockRight, (i + j) * 8 + k + 1); int idx = 4 + (k / 2); - r[idx] += r0 + r1; - g[idx] += g0 + g1; - b[idx] += b0 + b1; + r[idx] += px0.R + px1.R; + g[idx] += px0.G + px1.G; + b[idx] += px0.B + px1.B; + } } From 2e25a3ee34ca3c21c9ade0a5c3c11131167a319b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 14:16:32 +0300 Subject: [PATCH 20/23] Optimized non-simd ycbcr lut converter code --- .../Encoder/RgbToYCbCrConverterLut.cs | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 06e8f26b69..e26e73044b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -167,9 +167,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Rgb24 rgbStart = ref rgbSpan[0]; for (int i = 0; i < 8; i += 2) { - Span r = stackalloc int[8]; - Span g = stackalloc int[8]; - Span b = stackalloc int[8]; + Span rgbTriplets = stackalloc int[24]; // 8 pixels by 3 integers for (int j = 0; j < 2; j++) { @@ -183,10 +181,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Rgb24 px1 = Unsafe.Add(ref stride, k + 1); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockLeft, (i + j) * 8 + k + 1); - int idx = k / 2; - r[idx] += px0.R + px1.R; - g[idx] += px0.G + px1.G; - b[idx] += px0.B + px1.B; + int idx = 3 * (k / 2); + rgbTriplets[idx] += px0.R + px1.R; + rgbTriplets[idx + 1] += px0.G + px1.G; + rgbTriplets[idx + 2] += px0.B + px1.B; } // right @@ -199,10 +197,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Rgb24 px1 = Unsafe.Add(ref stride, k + 1); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockRight, (i + j) * 8 + k + 1); - int idx = 4 + (k / 2); - r[idx] += px0.R + px1.R; - g[idx] += px0.G + px1.G; - b[idx] += px0.B + px1.B; + int idx = 3 * (4 + (k / 2)); + rgbTriplets[idx] += px0.R + px1.R; + rgbTriplets[idx + 1] += px0.G + px1.G; + rgbTriplets[idx + 2] += px0.B + px1.B; } } @@ -212,7 +210,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder + (i / 2) * 8; // which row for (int j = 0; j < 8; j++) { - this.ConvertPixelInto(r[j] / 4, g[j] / 4, b[j] / 4, ref cbBlock, ref crBlock, writeIdx + j); + int idx = j * 3; + this.ConvertPixelInto(rgbTriplets[idx] / 4, rgbTriplets[idx + 1] / 4, rgbTriplets[idx + 2] / 4, ref cbBlock, ref crBlock, writeIdx + j); } } } From 44bae0b79e8ee83dbbf5533c32f2eb34a33de490 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 16:50:07 +0300 Subject: [PATCH 21/23] Made non-simd ycbcr lut converter code more readable --- .../Encoder/RgbToYCbCrConverterLut.cs | 54 ++++++++++++------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index e26e73044b..18f5ee0e70 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -128,21 +128,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConvertPixelInto( - int r, - int g, - int b, - ref Block8x8F cbResult, - ref Block8x8F crResult, - int i) + private void ConvertPixelInto(int r, int g, int b, ref float yResult) => + // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); + yResult = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertPixelInto(int r, int g, int b, ref float cbResult, ref float crResult) { // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); - cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; + cbResult = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); - crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; + crResult = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } + public void Convert(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { ref Rgb24 rgbStart = ref rgbSpan[0]; @@ -164,10 +164,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { + ref float yBlockLeftRef = ref Unsafe.As(ref yBlockLeft); + ref float yBlockRightRef = ref Unsafe.As(ref yBlockRight); + + // 0-31 or 32-63 + // upper or lower part + int chromaWriteOffset = row * Block8x8F.Size / 2; + ref float cbBlockRef = ref Unsafe.Add(ref Unsafe.As(ref cbBlock), chromaWriteOffset); + ref float crBlockRef = ref Unsafe.Add(ref Unsafe.As(ref crBlock), chromaWriteOffset); + ref Rgb24 rgbStart = ref rgbSpan[0]; + for (int i = 0; i < 8; i += 2) { - Span rgbTriplets = stackalloc int[24]; // 8 pixels by 3 integers + // 8 pixels by 3 integers + Span rgbTriplets = stackalloc int[24]; for (int j = 0; j < 2; j++) { @@ -175,11 +186,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); for (int k = 0; k < 8; k += 2) { + ref float yBlockRef = ref Unsafe.Add(ref yBlockLeftRef, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockLeft, (i + j) * 8 + k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockLeft, (i + j) * 8 + k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (k / 2); rgbTriplets[idx] += px0.R + px1.R; @@ -191,11 +204,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder stride = ref Unsafe.Add(ref stride, 8); for (int k = 0; k < 8; k += 2) { + ref float yBlockRef = ref Unsafe.Add(ref yBlockRightRef, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRight, (i + j) * 8 + k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockRight, (i + j) * 8 + k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (4 + (k / 2)); rgbTriplets[idx] += px0.R + px1.R; @@ -205,13 +220,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - int writeIdx = - row * Block8x8F.Size / 2 // upper or lower part - + (i / 2) * 8; // which row + int writeIdx = 8 * (i / 2); for (int j = 0; j < 8; j++) { int idx = j * 3; - this.ConvertPixelInto(rgbTriplets[idx] / 4, rgbTriplets[idx + 1] / 4, rgbTriplets[idx + 2] / 4, ref cbBlock, ref crBlock, writeIdx + j); + this.ConvertPixelInto( + rgbTriplets[idx] / 4, // r + rgbTriplets[idx + 1] / 4, // g + rgbTriplets[idx + 2] / 4, // b + ref Unsafe.Add(ref cbBlockRef, writeIdx + j), + ref Unsafe.Add(ref crBlockRef, writeIdx + j)); } } } From 078703b595ecf204db96c34220b1d23ca9499b8a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 17:28:57 +0300 Subject: [PATCH 22/23] Added docs, renamed LuT converter for 444 and 420 subsampling methods, added debug guards --- .../Encoder/RgbToYCbCrConverterLut.cs | 32 +++++++++++++++---- .../YCbCrForwardConverter420{TPixel}.cs | 2 +- .../YCbCrForwardConverter444{TPixel}.cs | 2 +- .../Encoder/YCbCrForwardConverterBenchmark.cs | 2 +- .../Formats/Jpg/RgbToYCbCrConverterTests.cs | 2 +- 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 18f5ee0e70..3706b80622 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -142,8 +142,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder crResult = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } - - public void Convert(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) + /// + /// Converts Rgb24 pixels into YCbCr color space with 4:4:4 subsampling sampling of luminance and chroma. + /// + /// Span of Rgb24 pixel data + /// Resulting Y values block + /// Resulting Cb values block + /// Resulting Cr values block + public void Convert444(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { ref Rgb24 rgbStart = ref rgbSpan[0]; @@ -162,8 +168,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - public void Convert(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + /// + /// Converts Rgb24 pixels into YCbCr color space with 4:2:0 subsampling of luminance and chroma. + /// + /// Calculates 2 out of 4 luminance blocks and half of chroma blocks. This method must be called twice per 4x 8x8 DCT blocks with different row param. + /// Span of Rgb24 pixel data + /// First or "left" resulting Y block + /// Second or "right" resulting Y block + /// Resulting Cb values block + /// Resulting Cr values block + /// Row index of the 16x16 block, 0 or 1 + public void Convert420(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { + DebugGuard.MustBeBetweenOrEqualTo(row, 0, 1, nameof(row)); + ref float yBlockLeftRef = ref Unsafe.As(ref yBlockLeft); ref float yBlockRightRef = ref Unsafe.As(ref yBlockRight); @@ -189,9 +207,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref float yBlockRef = ref Unsafe.Add(ref yBlockLeftRef, (i + j) * 8 + k); Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (k / 2); @@ -207,9 +225,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref float yBlockRef = ref Unsafe.Add(ref yBlockRightRef, (i + j) * 8 + k); Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (4 + (k / 2)); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index 2e8433cdc6..e0e7854b0d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -106,7 +106,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } else { - this.colorTables.Convert(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); + this.colorTables.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 27f7e3ae9c..f3ae339347 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -128,7 +128,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } else { - this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } } diff --git a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs b/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs index 1db4072932..60a5853847 100644 --- a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs +++ b/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs @@ -37,7 +37,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components.Encoder Block8x8F cb = default; Block8x8F cr = default; - this.converter.Convert(this.data.AsSpan(), ref y, ref cb, ref cr); + this.converter.Convert444(this.data.AsSpan(), ref y, ref cb, ref cr); } [Benchmark] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs index 9a6fc8d6fd..c605a6cf89 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs @@ -32,7 +32,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F cb = default; Block8x8F cr = default; - target.Convert(data.AsSpan(), ref y, ref cb, ref cr); + target.Convert444(data.AsSpan(), ref y, ref cb, ref cr); Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F)); } From da1b85bee38b4e4ceded1c57d25ac13a2a0e8f22 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 18:04:58 +0300 Subject: [PATCH 23/23] Final cleanup of the non-simd 420 rgb -> ycbcr conversion code --- .../Encoder/RgbToYCbCrConverterLut.cs | 62 +++++++++---------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 3706b80622..7681063eef 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -200,45 +200,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int j = 0; j < 2; j++) { - // left + int yBlockWriteOffset = (i + j) * 8; ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); - for (int k = 0; k < 8; k += 2) - { - ref float yBlockRef = ref Unsafe.Add(ref yBlockLeftRef, (i + j) * 8 + k); - - Rgb24 px0 = Unsafe.Add(ref stride, k); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); - int idx = 3 * (k / 2); - rgbTriplets[idx] += px0.R + px1.R; - rgbTriplets[idx + 1] += px0.G + px1.G; - rgbTriplets[idx + 2] += px0.B + px1.B; - } + // left + this.ConvertChunk420(ref stride, ref Unsafe.Add(ref yBlockLeftRef, yBlockWriteOffset), rgbTriplets); // right - stride = ref Unsafe.Add(ref stride, 8); - for (int k = 0; k < 8; k += 2) - { - ref float yBlockRef = ref Unsafe.Add(ref yBlockRightRef, (i + j) * 8 + k); - - Rgb24 px0 = Unsafe.Add(ref stride, k); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); - - int idx = 3 * (4 + (k / 2)); - rgbTriplets[idx] += px0.R + px1.R; - rgbTriplets[idx + 1] += px0.G + px1.G; - rgbTriplets[idx + 2] += px0.B + px1.B; - - } + this.ConvertChunk420(ref Unsafe.Add(ref stride, 8), ref Unsafe.Add(ref yBlockRightRef, yBlockWriteOffset), rgbTriplets.Slice(12)); } int writeIdx = 8 * (i / 2); + ref float cbWriteRef = ref Unsafe.Add(ref cbBlockRef, writeIdx); + ref float crWriteRef = ref Unsafe.Add(ref crBlockRef, writeIdx); for (int j = 0; j < 8; j++) { int idx = j * 3; @@ -246,12 +220,32 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder rgbTriplets[idx] / 4, // r rgbTriplets[idx + 1] / 4, // g rgbTriplets[idx + 2] / 4, // b - ref Unsafe.Add(ref cbBlockRef, writeIdx + j), - ref Unsafe.Add(ref crBlockRef, writeIdx + j)); + ref Unsafe.Add(ref cbWriteRef, j), + ref Unsafe.Add(ref crWriteRef, j)); } } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, Span chromaRgbTriplet) + { + for (int k = 0; k < 8; k += 2) + { + ref float yBlockRef = ref Unsafe.Add(ref yBlock, k); + + Rgb24 px0 = Unsafe.Add(ref stride, k); + Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); + + int idx = 3 * (k / 2); + chromaRgbTriplet[idx] += px0.R + px1.R; + chromaRgbTriplet[idx + 1] += px0.G + px1.G; + chromaRgbTriplet[idx + 2] += px0.B + px1.B; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int Fix(float x) => (int)((x * (1L << ScaleBits)) + 0.5F);