From aab2837d28ff64f2e9ee92fd0fc49e3f41c5bbd6 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 23:16:26 +0100 Subject: [PATCH] More codegen improvements to shared methods --- .../ColorSpaces/Companding/SRgbCompanding.cs | 24 ++- src/ImageSharp/Common/Helpers/Numerics.cs | 171 ++++++++++-------- .../Utils/Vector4Converters.Default.cs | 50 ++--- 3 files changed, 138 insertions(+), 107 deletions(-) diff --git a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs index 2e212ad19..9a8b5f0a8 100644 --- a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs +++ b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; @@ -25,12 +25,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding [MethodImpl(InliningOptions.ShortMethod)] public static void Expand(Span vectors) { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Expand(ref v); + Expand(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } @@ -41,12 +43,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding [MethodImpl(InliningOptions.ShortMethod)] public static void Compress(Span vectors) { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Compress(ref v); + Compress(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } @@ -90,4 +94,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding [MethodImpl(InliningOptions.ShortMethod)] public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F; } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 56ab46c68..99d91168b 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -41,13 +41,11 @@ namespace SixLabors.ImageSharp /// /// Determine the Least Common Multiple (LCM) of two numbers. + /// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int LeastCommonMultiple(int a, int b) - { - // https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor - return (a / GreatestCommonDivisor(a, b)) * b; - } + => a / GreatestCommonDivisor(a, b) * b; /// /// Calculates % 2 @@ -290,10 +288,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref byte remainderStart = ref MemoryMarshal.GetReference(remainder); + ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref byte v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -311,10 +313,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref uint remainderStart = ref MemoryMarshal.GetReference(remainder); + ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref uint v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -332,10 +338,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref int remainderStart = ref MemoryMarshal.GetReference(remainder); + ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref int v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -353,10 +363,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref float remainderStart = ref MemoryMarshal.GetReference(remainder); + ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref float v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -374,10 +388,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref double remainderStart = ref MemoryMarshal.GetReference(remainder); + ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref double v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -472,10 +490,8 @@ namespace SixLabors.ImageSharp #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported && vectors.Length >= 2) { - ref Vector256 vectorsBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) @@ -495,12 +511,14 @@ namespace SixLabors.ImageSharp else #endif { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + Premultiply(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } } @@ -515,10 +533,8 @@ namespace SixLabors.ImageSharp #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported && vectors.Length >= 2) { - ref Vector256 vectorsBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) @@ -538,12 +554,14 @@ namespace SixLabors.ImageSharp else #endif { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + UnPremultiply(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } } @@ -633,53 +651,54 @@ namespace SixLabors.ImageSharp vectors128Ref = y4; vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1); } - - return; } + else #endif - ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors); - ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length); - - // Fallback with scalar preprocessing and vectorized approximation steps - while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd)) { - Vector4 v = vectorsRef; - - double - x64 = v.X, - y64 = v.Y, - z64 = v.Z; - float a = v.W; + ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length); - ulong - xl = *(ulong*)&x64, - yl = *(ulong*)&y64, - zl = *(ulong*)&z64; - - // Here we use a trick to compute the starting value x0 for the cube root. This is because doing - // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case, - // this means what we actually want is to find the cube root of our clamped values. - // For more info on the constant below, see: - // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543. - // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and - // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit - // register, and use it to accelerate two steps of the Newton approximation using SIMD. - xl = 0x2a9f8a7be393b600 + (xl / 3); - yl = 0x2a9f8a7be393b600 + (yl / 3); - zl = 0x2a9f8a7be393b600 + (zl / 3); - - Vector4 y4; - y4.X = (float)*(double*)&xl; - y4.Y = (float)*(double*)&yl; - y4.Z = (float)*(double*)&zl; - y4.W = 0; - - y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); - y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); - y4.W = a; - - vectorsRef = y4; - vectorsRef = ref Unsafe.Add(ref vectorsRef, 1); + // Fallback with scalar preprocessing and vectorized approximation steps + while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd)) + { + Vector4 v = vectorsRef; + + double + x64 = v.X, + y64 = v.Y, + z64 = v.Z; + float a = v.W; + + ulong + xl = *(ulong*)&x64, + yl = *(ulong*)&y64, + zl = *(ulong*)&z64; + + // Here we use a trick to compute the starting value x0 for the cube root. This is because doing + // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case, + // this means what we actually want is to find the cube root of our clamped values. + // For more info on the constant below, see: + // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543. + // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and + // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit + // register, and use it to accelerate two steps of the Newton approximation using SIMD. + xl = 0x2a9f8a7be393b600 + (xl / 3); + yl = 0x2a9f8a7be393b600 + (yl / 3); + zl = 0x2a9f8a7be393b600 + (zl / 3); + + Vector4 y4; + y4.X = (float)*(double*)&xl; + y4.Y = (float)*(double*)&yl; + y4.Z = (float)*(double*)&zl; + y4.W = 0; + + y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); + y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); + y4.W = a; + + vectorsRef = y4; + vectorsRef = ref Unsafe.Add(ref vectorsRef, 1); + } } } } diff --git a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs index 999f6325b..6b6ff4319 100644 --- a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs +++ b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs @@ -88,14 +88,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destPixels) where TPixel : unmanaged, IPixel { - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length); ref TPixel destRef = ref MemoryMarshal.GetReference(destPixels); - for (int i = 0; i < sourceVectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i); - ref TPixel dp = ref Unsafe.Add(ref destRef, i); - dp.FromVector4(sp); + destRef.FromVector4(sourceStart); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -105,14 +107,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destVectors) where TPixel : unmanaged, IPixel { - ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourcePixels); + ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourcePixels); + ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourcePixels.Length); ref Vector4 destRef = ref MemoryMarshal.GetReference(destVectors); - for (int i = 0; i < sourcePixels.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref TPixel sp = ref Unsafe.Add(ref sourceRef, i); - ref Vector4 dp = ref Unsafe.Add(ref destRef, i); - dp = sp.ToVector4(); + destRef = sourceStart.ToVector4(); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -122,14 +126,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destinationColors) where TPixel : unmanaged, IPixel { - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length); ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors); - for (int i = 0; i < sourceVectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i); - ref TPixel dp = ref Unsafe.Add(ref destRef, i); - dp.FromScaledVector4(sp); + destRef.FromScaledVector4(sourceStart); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -139,16 +145,18 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destinationVectors) where TPixel : unmanaged, IPixel { - ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors); + ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourceColors); + ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourceColors.Length); ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors); - for (int i = 0; i < sourceColors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref TPixel sp = ref Unsafe.Add(ref sourceRef, i); - ref Vector4 dp = ref Unsafe.Add(ref destRef, i); - dp = sp.ToScaledVector4(); + destRef = sourceStart.ToScaledVector4(); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } } } -} \ No newline at end of file +}