Browse Source

More codegen improvements to shared methods

js/color-alpha-handling
Sergio Pedri 5 years ago
parent
commit
aab2837d28
  1. 24
      src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs
  2. 171
      src/ImageSharp/Common/Helpers/Numerics.cs
  3. 50
      src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs

24
src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
@ -25,12 +25,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static void Expand(Span<Vector4> vectors)
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Expand(ref v);
Expand(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
@ -41,12 +43,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static void Compress(Span<Vector4> vectors)
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Compress(ref v);
Compress(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
@ -90,4 +94,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
}
}
}

171
src/ImageSharp/Common/Helpers/Numerics.cs

@ -41,13 +41,11 @@ namespace SixLabors.ImageSharp
/// <summary>
/// Determine the Least Common Multiple (LCM) of two numbers.
/// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int LeastCommonMultiple(int a, int b)
{
// https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor
return (a / GreatestCommonDivisor(a, b)) * b;
}
=> a / GreatestCommonDivisor(a, b) * b;
/// <summary>
/// Calculates <paramref name="x"/> % 2
@ -290,10 +288,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref byte remainderStart = ref MemoryMarshal.GetReference(remainder);
ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref byte v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -311,10 +313,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref uint remainderStart = ref MemoryMarshal.GetReference(remainder);
ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref uint v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -332,10 +338,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref int remainderStart = ref MemoryMarshal.GetReference(remainder);
ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref int v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -353,10 +363,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref float remainderStart = ref MemoryMarshal.GetReference(remainder);
ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref float v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -374,10 +388,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref double remainderStart = ref MemoryMarshal.GetReference(remainder);
ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref double v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -472,10 +490,8 @@ namespace SixLabors.ImageSharp
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && vectors.Length >= 2)
{
ref Vector256<float> vectorsBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@ -495,12 +511,14 @@ namespace SixLabors.ImageSharp
else
#endif
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Premultiply(ref v);
Premultiply(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
}
@ -515,10 +533,8 @@ namespace SixLabors.ImageSharp
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && vectors.Length >= 2)
{
ref Vector256<float> vectorsBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@ -538,12 +554,14 @@ namespace SixLabors.ImageSharp
else
#endif
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
UnPremultiply(ref v);
UnPremultiply(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
}
@ -633,53 +651,54 @@ namespace SixLabors.ImageSharp
vectors128Ref = y4;
vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1);
}
return;
}
else
#endif
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
// Fallback with scalar preprocessing and vectorized approximation steps
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
{
Vector4 v = vectorsRef;
double
x64 = v.X,
y64 = v.Y,
z64 = v.Z;
float a = v.W;
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
ulong
xl = *(ulong*)&x64,
yl = *(ulong*)&y64,
zl = *(ulong*)&z64;
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
// this means what we actually want is to find the cube root of our clamped values.
// For more info on the constant below, see:
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
xl = 0x2a9f8a7be393b600 + (xl / 3);
yl = 0x2a9f8a7be393b600 + (yl / 3);
zl = 0x2a9f8a7be393b600 + (zl / 3);
Vector4 y4;
y4.X = (float)*(double*)&xl;
y4.Y = (float)*(double*)&yl;
y4.Z = (float)*(double*)&zl;
y4.W = 0;
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4.W = a;
vectorsRef = y4;
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
// Fallback with scalar preprocessing and vectorized approximation steps
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
{
Vector4 v = vectorsRef;
double
x64 = v.X,
y64 = v.Y,
z64 = v.Z;
float a = v.W;
ulong
xl = *(ulong*)&x64,
yl = *(ulong*)&y64,
zl = *(ulong*)&z64;
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
// this means what we actually want is to find the cube root of our clamped values.
// For more info on the constant below, see:
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
xl = 0x2a9f8a7be393b600 + (xl / 3);
yl = 0x2a9f8a7be393b600 + (yl / 3);
zl = 0x2a9f8a7be393b600 + (zl / 3);
Vector4 y4;
y4.X = (float)*(double*)&xl;
y4.Y = (float)*(double*)&yl;
y4.Z = (float)*(double*)&zl;
y4.W = 0;
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4.W = a;
vectorsRef = y4;
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
}
}
}
}

50
src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs

@ -88,14 +88,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<TPixel> destPixels)
where TPixel : unmanaged, IPixel<TPixel>
{
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length);
ref TPixel destRef = ref MemoryMarshal.GetReference(destPixels);
for (int i = 0; i < sourceVectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
ref TPixel dp = ref Unsafe.Add(ref destRef, i);
dp.FromVector4(sp);
destRef.FromVector4(sourceStart);
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
@ -105,14 +107,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<Vector4> destVectors)
where TPixel : unmanaged, IPixel<TPixel>
{
ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourcePixels);
ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourcePixels.Length);
ref Vector4 destRef = ref MemoryMarshal.GetReference(destVectors);
for (int i = 0; i < sourcePixels.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
dp = sp.ToVector4();
destRef = sourceStart.ToVector4();
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
@ -122,14 +126,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<TPixel> destinationColors)
where TPixel : unmanaged, IPixel<TPixel>
{
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length);
ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors);
for (int i = 0; i < sourceVectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
ref TPixel dp = ref Unsafe.Add(ref destRef, i);
dp.FromScaledVector4(sp);
destRef.FromScaledVector4(sourceStart);
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
@ -139,16 +145,18 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<Vector4> destinationVectors)
where TPixel : unmanaged, IPixel<TPixel>
{
ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors);
ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourceColors);
ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourceColors.Length);
ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors);
for (int i = 0; i < sourceColors.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
dp = sp.ToScaledVector4();
destRef = sourceStart.ToScaledVector4();
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
}
}
}
}

Loading…
Cancel
Save