diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index a6ff21bdc9..62e82243cb 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -34,12 +34,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; - private static ReadOnlySpan MoveLast24BytesToSeparateLanes => new byte[] - { - 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, - 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0 - }; - private static ReadOnlySpan ExtractRgb => new byte[] { 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, @@ -102,7 +96,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // Extra mask & separate stride:7 calculations can be eliminated by simply providing rgb pixel span of slightly bigger size than pixels data need: // Total pixel data size is 192 bytes, avx registers need it to be 200 bytes const int bytesPerRgbStride = 24; - for (int i = 0; i < 7; i++) + for (int i = 0; i < 8; i++) { rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); @@ -124,26 +118,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); } - - extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes)); - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); - rgb = Avx2.Shuffle(rgb, extractRgbMask); - - rg = Avx2.UnpackLow(rgb, zero); - bx = Avx2.UnpackHigh(rgb, zero); - - r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); - g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); - b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); - - // (0.299F * r) + (0.587F * g) + (0.114F * b); - Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); - - // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) - Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); - - // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) - Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); #endif } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 81e64b277b..ee4626b86a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -42,14 +43,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Temporal RGB block /// - private GenericBlock8x8 rgbBlock; + private Span rgbSpan; public static YCbCrForwardConverter Create() { var result = default(YCbCrForwardConverter); + + // creating rgb pixel bufferr + // TODO: this is subject to discuss + result.rgbSpan = MemoryMarshal.Cast(new byte[200].AsSpan()); + + // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) { - // Avoid creating lookup tables, when vectorized converter is supported result.colorTables = RgbToYCbCrConverterLut.Create(); } @@ -63,8 +69,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); - Span rgbSpan = this.rgbBlock.AsSpanUnsafe(); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan); + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); ref Block8x8F yBlock = ref this.Y; ref Block8x8F cbBlock = ref this.Cb; @@ -72,11 +77,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } else { - this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } }