diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index 8c34baa1dc..ca7971a074 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -59,8 +59,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector256 crBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); // Used for the color conversion var chromaOffset = Vector256.Create(-halfValue); @@ -76,14 +76,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32); Vector256 vcontrol = Unsafe.As>(ref control); - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector256 rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector256 ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector256 bbRefAsVector = ref Unsafe.As>(ref bb); - // Walking 8 elements at one step: int n = result.Length / 8; for (int i = 0; i < n; i++) @@ -107,13 +99,46 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.PackAvx2(ref rr, ref gg, ref bb, in valpha, in vcontrol); + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the + // expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + // + // Left side. + Vector256 r0 = Avx.InsertVector128( + r, + Unsafe.As, Vector128>(ref g), + 1); + + Vector256 r1 = Avx.InsertVector128( + b, + valpha, + 1); + + // Right side + Vector256 r2 = Avx.InsertVector128( + Unsafe.Add(ref Unsafe.As, Vector128>(ref r), 1).ToVector256(), + Unsafe.Add(ref Unsafe.As, Vector128>(ref g), 1), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.Add(ref Unsafe.As, Vector128>(ref b), 1).ToVector256(), + valpha, + 1); + + // Split into separate rows + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackHigh(r0, r1); + + // Deinterleave and set + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + destination = Avx2.PermuteVar8x32(t0, vcontrol); + Unsafe.Add(ref destination, 1) = Avx2.PermuteVar8x32(t2, vcontrol); + + // Repeat for right side. + Vector256 t4 = Avx.UnpackLow(r2, r3); + Vector256 t6 = Avx.UnpackHigh(r2, r3); + + Unsafe.Add(ref destination, 2) = Avx2.PermuteVar8x32(t4, vcontrol); + Unsafe.Add(ref destination, 3) = Avx2.PermuteVar8x32(t6, vcontrol); } #else ref Vector yBase = diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index b40d9b9e6e..7c780700c9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -4,12 +4,6 @@ using System; using System.Collections.Generic; using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -190,53 +184,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters #pragma warning disable SA1132 // Do not combine fields public Vector4 V0, V1, V2, V3, V4, V5, V6, V7; -#if SUPPORTS_RUNTIME_INTRINSICS - - /// - /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void PackAvx2( - ref Vector4Pair r, - ref Vector4Pair g, - ref Vector4Pair b, - in Vector128 a, - in Vector256 vcontrol) - { - Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref r.A).ToVector256(), - Unsafe.As>(ref g.A), - 1); - - Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref b.A).ToVector256(), - a, - 1); - - Vector256 r2 = Avx.InsertVector128( - Unsafe.As>(ref r.B).ToVector256(), - Unsafe.As>(ref g.B), - 1); - - Vector256 r3 = Avx.InsertVector128( - Unsafe.As>(ref b.B).ToVector256(), - a, - 1); - - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackHigh(r0, r1); - - Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); - Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); - - Vector256 t4 = Avx.UnpackLow(r2, r3); - Vector256 t6 = Avx.UnpackHigh(r2, r3); - - Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); - Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); - } -#endif - /// /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... ///