From f3c74547b47f023cbfca3fe60c5eb0286f7502d9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 10:34:06 +0000 Subject: [PATCH] manually inline meythod --- .../Formats/Jpeg/Components/Block8x8F.cs | 105 +++++++++++++----- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 342d12068..d814e5036 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -561,31 +561,99 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) { #if SUPPORTS_RUNTIME_INTRINSICS + + // Avx version is written inline to avoid JIT bugs on MacOS. if (Avx.IsSupported) { + // V0 + Vector vs = Unsafe.As>(ref a.V0L); + Vector voff + = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) + * OffsetAxv; + + Vector256 v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V0L)); + Unsafe.As>(ref a.V0L) - = DivideRoundAvx(ref a.V0L, ref b.V0L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V1 + vs = Unsafe.As>(ref a.V1L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V1L)); Unsafe.As>(ref a.V1L) - = DivideRoundAvx(ref a.V1L, ref b.V1L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V2 + vs = Unsafe.As>(ref a.V2L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V2L)); Unsafe.As>(ref a.V2L) - = DivideRoundAvx(ref a.V2L, ref b.V2L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V3 + vs = Unsafe.As>(ref a.V3L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V3L)); Unsafe.As>(ref a.V3L) - = DivideRoundAvx(ref a.V3L, ref b.V3L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V4 + vs = Unsafe.As>(ref a.V4L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V4L)); Unsafe.As>(ref a.V4L) - = DivideRoundAvx(ref a.V4L, ref b.V4L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V5 + vs = Unsafe.As>(ref a.V5L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V5L)); Unsafe.As>(ref a.V5L) - = DivideRoundAvx(ref a.V5L, ref b.V5L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V6 + vs = Unsafe.As>(ref a.V6L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V6L)); Unsafe.As>(ref a.V6L) - = DivideRoundAvx(ref a.V6L, ref b.V6L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V7 + vs = Unsafe.As>(ref a.V7L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V7L)); Unsafe.As>(ref a.V7L) - = DivideRoundAvx(ref a.V7L, ref b.V7L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); } else #endif @@ -609,27 +677,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } } -#if SUPPORTS_RUNTIME_INTRINSICS - // [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 DivideRoundAvx( - ref Vector4 dividend, - ref Vector4 divisor) - { - Vector vdividend = Unsafe.As>(ref dividend); - - // sign(dividend) = max(min(dividend, 1), -1) - Vector offset - = Vector.Min(Vector.Max(NegativeOneAvx, vdividend), Vector.One) * OffsetAxv; - - // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend) - Vector256 v = Avx.Divide( - Unsafe.As, Vector256>(ref vdividend), - Unsafe.As>(ref divisor)); - - return Avx.Add(v, Unsafe.As, Vector256>(ref offset)); - } -#endif - [MethodImpl(InliningOptions.ShortMethod)] private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor) {