Browse Source

Merge pull request #1477 from SixLabors/sp/2pass-convolution-speedup

1D convolution optimization and general codegen tweaks
js/color-alpha-handling
James Jackson-South 5 years ago
committed by GitHub
parent
commit
b4e7d80ec4
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 24
      src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs
  2. 198
      src/ImageSharp/Common/Helpers/Numerics.cs
  3. 50
      src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs
  4. 29
      src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs
  5. 21
      src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs
  6. 23
      src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs
  7. 409
      src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs
  8. 28
      src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs
  9. 163
      src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs
  10. 14
      src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs
  11. 14
      src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs

24
src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
@ -25,12 +25,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static void Expand(Span<Vector4> vectors)
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Expand(ref v);
Expand(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
@ -41,12 +43,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static void Compress(Span<Vector4> vectors)
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Compress(ref v);
Compress(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
@ -90,4 +94,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
}
}
}

198
src/ImageSharp/Common/Helpers/Numerics.cs

@ -41,13 +41,11 @@ namespace SixLabors.ImageSharp
/// <summary>
/// Determine the Least Common Multiple (LCM) of two numbers.
/// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int LeastCommonMultiple(int a, int b)
{
// https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor
return (a / GreatestCommonDivisor(a, b)) * b;
}
=> a / GreatestCommonDivisor(a, b) * b;
/// <summary>
/// Calculates <paramref name="x"/> % 2
@ -290,10 +288,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref byte remainderStart = ref MemoryMarshal.GetReference(remainder);
ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref byte v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -311,10 +313,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref uint remainderStart = ref MemoryMarshal.GetReference(remainder);
ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref uint v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -332,10 +338,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref int remainderStart = ref MemoryMarshal.GetReference(remainder);
ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref int v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -353,10 +363,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref float remainderStart = ref MemoryMarshal.GetReference(remainder);
ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref float v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -374,10 +388,14 @@ namespace SixLabors.ImageSharp
if (remainder.Length > 0)
{
for (int i = 0; i < remainder.Length; i++)
ref double remainderStart = ref MemoryMarshal.GetReference(remainder);
ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
{
ref double v = ref remainder[i];
v = Clamp(v, min, max);
remainderStart = Clamp(remainderStart, min, max);
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
}
}
}
@ -407,7 +425,6 @@ namespace SixLabors.ImageSharp
where T : unmanaged
{
ref T sRef = ref MemoryMarshal.GetReference(span);
ref Vector<T> vsBase = ref Unsafe.As<T, Vector<T>>(ref MemoryMarshal.GetReference(span));
var vmin = new Vector<T>(min);
var vmax = new Vector<T>(max);
@ -415,25 +432,35 @@ namespace SixLabors.ImageSharp
int m = Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
ref Vector<T> vs0 = ref Unsafe.Add(ref vsBase, i);
ref Vector<T> vs1 = ref Unsafe.Add(ref vs0, 1);
ref Vector<T> vs2 = ref Unsafe.Add(ref vs0, 2);
ref Vector<T> vs3 = ref Unsafe.Add(ref vs0, 3);
ref Vector<T> vs0 = ref Unsafe.As<T, Vector<T>>(ref MemoryMarshal.GetReference(span));
ref Vector<T> vs1 = ref Unsafe.Add(ref vs0, 1);
ref Vector<T> vs2 = ref Unsafe.Add(ref vs0, 2);
ref Vector<T> vs3 = ref Unsafe.Add(ref vs0, 3);
ref Vector<T> vsEnd = ref Unsafe.Add(ref vs0, u);
while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
{
vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
vs1 = Vector.Min(Vector.Max(vmin, vs1), vmax);
vs2 = Vector.Min(Vector.Max(vmin, vs2), vmax);
vs3 = Vector.Min(Vector.Max(vmin, vs3), vmax);
vs0 = ref Unsafe.Add(ref vs0, 4);
vs1 = ref Unsafe.Add(ref vs1, 4);
vs2 = ref Unsafe.Add(ref vs2, 4);
vs3 = ref Unsafe.Add(ref vs3, 4);
}
if (m > 0)
{
for (int i = u; i < n; i++)
vs0 = ref vsEnd;
vsEnd = ref Unsafe.Add(ref vsEnd, m);
while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
{
ref Vector<T> vs0 = ref Unsafe.Add(ref vsBase, i);
vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
vs0 = ref Unsafe.Add(ref vs0, 1);
}
}
}
@ -472,10 +499,8 @@ namespace SixLabors.ImageSharp
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && vectors.Length >= 2)
{
ref Vector256<float> vectorsBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@ -495,12 +520,14 @@ namespace SixLabors.ImageSharp
else
#endif
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Premultiply(ref v);
Premultiply(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
}
@ -515,10 +542,8 @@ namespace SixLabors.ImageSharp
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && vectors.Length >= 2)
{
ref Vector256<float> vectorsBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@ -538,12 +563,14 @@ namespace SixLabors.ImageSharp
else
#endif
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
for (int i = 0; i < vectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
UnPremultiply(ref v);
UnPremultiply(ref vectorsStart);
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
}
}
}
@ -633,53 +660,54 @@ namespace SixLabors.ImageSharp
vectors128Ref = y4;
vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1);
}
return;
}
else
#endif
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
// Fallback with scalar preprocessing and vectorized approximation steps
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
{
Vector4 v = vectorsRef;
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
double
x64 = v.X,
y64 = v.Y,
z64 = v.Z;
float a = v.W;
ulong
xl = *(ulong*)&x64,
yl = *(ulong*)&y64,
zl = *(ulong*)&z64;
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
// this means what we actually want is to find the cube root of our clamped values.
// For more info on the constant below, see:
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
xl = 0x2a9f8a7be393b600 + (xl / 3);
yl = 0x2a9f8a7be393b600 + (yl / 3);
zl = 0x2a9f8a7be393b600 + (zl / 3);
Vector4 y4;
y4.X = (float)*(double*)&xl;
y4.Y = (float)*(double*)&yl;
y4.Z = (float)*(double*)&zl;
y4.W = 0;
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4.W = a;
vectorsRef = y4;
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
// Fallback with scalar preprocessing and vectorized approximation steps
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
{
Vector4 v = vectorsRef;
double
x64 = v.X,
y64 = v.Y,
z64 = v.Z;
float a = v.W;
ulong
xl = *(ulong*)&x64,
yl = *(ulong*)&y64,
zl = *(ulong*)&z64;
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
// this means what we actually want is to find the cube root of our clamped values.
// For more info on the constant below, see:
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
xl = 0x2a9f8a7be393b600 + (xl / 3);
yl = 0x2a9f8a7be393b600 + (yl / 3);
zl = 0x2a9f8a7be393b600 + (zl / 3);
Vector4 y4;
y4.X = (float)*(double*)&xl;
y4.Y = (float)*(double*)&yl;
y4.Z = (float)*(double*)&zl;
y4.W = 0;
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
y4.W = a;
vectorsRef = y4;
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
}
}
}
}

50
src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs

@ -88,14 +88,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<TPixel> destPixels)
where TPixel : unmanaged, IPixel<TPixel>
{
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length);
ref TPixel destRef = ref MemoryMarshal.GetReference(destPixels);
for (int i = 0; i < sourceVectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
ref TPixel dp = ref Unsafe.Add(ref destRef, i);
dp.FromVector4(sp);
destRef.FromVector4(sourceStart);
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
@ -105,14 +107,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<Vector4> destVectors)
where TPixel : unmanaged, IPixel<TPixel>
{
ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourcePixels);
ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourcePixels.Length);
ref Vector4 destRef = ref MemoryMarshal.GetReference(destVectors);
for (int i = 0; i < sourcePixels.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
dp = sp.ToVector4();
destRef = sourceStart.ToVector4();
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
@ -122,14 +126,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<TPixel> destinationColors)
where TPixel : unmanaged, IPixel<TPixel>
{
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length);
ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors);
for (int i = 0; i < sourceVectors.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
ref TPixel dp = ref Unsafe.Add(ref destRef, i);
dp.FromScaledVector4(sp);
destRef.FromScaledVector4(sourceStart);
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
@ -139,16 +145,18 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
Span<Vector4> destinationVectors)
where TPixel : unmanaged, IPixel<TPixel>
{
ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors);
ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourceColors);
ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourceColors.Length);
ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors);
for (int i = 0; i < sourceColors.Length; i++)
while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd))
{
ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
dp = sp.ToScaledVector4();
destRef = sourceStart.ToScaledVector4();
sourceStart = ref Unsafe.Add(ref sourceStart, 1);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
}
}
}
}

29
src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs

@ -129,29 +129,34 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
int boundsWidth = this.bounds.Width;
int kernelSize = this.kernel.Length;
Span<int> rowOffsets = this.map.GetRowOffsetSpan();
ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(rowOffsets), (y - this.bounds.Y) * kernelSize);
ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize);
// The target buffer is zeroed initially and then it accumulates the results
// of each partial convolution, so we don't have to clear it here as well
ref Vector4 targetBase = ref this.targetValues.GetElementUnsafe(boundsX, y);
ref Complex64 kernelBase = ref this.kernel[0];
ref Complex64 kernelStart = ref this.kernel[0];
ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize);
for (int kY = 0; kY < kernelSize; kY++)
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{
// Get the precalculated source sample row for this kernel row and copy to our buffer
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleY);
Complex64 factor = Unsafe.Add(ref kernelBase, kY);
ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleRowBase);
ref ComplexVector4 sourceEnd = ref Unsafe.Add(ref sourceBase, boundsWidth);
ref Vector4 targetStart = ref targetBase;
Complex64 factor = kernelStart;
for (int x = 0; x < boundsWidth; x++)
while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd))
{
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
ComplexVector4 sample = Unsafe.Add(ref sourceBase, x);
ComplexVector4 partial = factor * sample;
ComplexVector4 partial = factor * sourceBase;
target += partial.WeightedSum(this.z, this.w);
targetStart += partial.WeightedSum(this.z, this.w);
sourceBase = ref Unsafe.Add(ref sourceBase, 1);
targetStart = ref Unsafe.Add(ref targetStart, 1);
}
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1);
}
}
}

21
src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs

@ -233,32 +233,37 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
// Clear the target buffer for each row run
Span<ComplexVector4> targetBuffer = this.targetValues.GetRowSpan(y);
targetBuffer.Clear();
ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
// Execute the bulk pixel format conversion for the current row
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, span);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span);
ref ComplexVector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer);
ref ComplexVector4 targetEnd = ref Unsafe.Add(ref targetStart, span.Length);
ref Complex64 kernelBase = ref this.kernel[0];
ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize);
ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan());
for (int x = 0; x < span.Length; x++)
while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd))
{
ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x);
ref Complex64 kernelStart = ref kernelBase;
ref int sampleColumnStart = ref sampleColumnBase;
for (int kX = 0; kX < kernelSize; kX++)
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
Complex64 factor = Unsafe.Add(ref kernelBase, kX);
Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX);
target.Sum(factor * sample);
targetStart.Sum(kernelStart * sample);
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1);
}
// Shift the base column sampling reference by one row at the end of each outer
// iteration so that the inner tight loop indexing can skip the multiplication
sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize);
targetStart = ref Unsafe.Add(ref targetStart, 1);
}
}
}

23
src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
@ -23,24 +24,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
: base(configuration, source, sourceRectangle)
{
int kernelSize = (definition.Radius * 2) + 1;
this.KernelX = CreateBoxKernel(kernelSize);
this.KernelY = this.KernelX.Transpose();
this.Kernel = CreateBoxKernel(kernelSize);
}
/// <summary>
/// Gets the horizontal gradient operator.
/// Gets the 1D convolution kernel.
/// </summary>
public DenseMatrix<float> KernelX { get; }
/// <summary>
/// Gets the vertical gradient operator.
/// </summary>
public DenseMatrix<float> KernelY { get; }
public float[] Kernel { get; }
/// <inheritdoc/>
protected override void OnFrameApply(ImageFrame<TPixel> source)
{
using var processor = new Convolution2PassProcessor<TPixel>(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle);
using var processor = new Convolution2PassProcessor<TPixel>(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle);
processor.Apply(source);
}
@ -50,10 +45,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// </summary>
/// <param name="kernelSize">The maximum size of the kernel in either direction.</param>
/// <returns>The <see cref="DenseMatrix{T}"/>.</returns>
private static DenseMatrix<float> CreateBoxKernel(int kernelSize)
private static float[] CreateBoxKernel(int kernelSize)
{
var kernel = new DenseMatrix<float>(kernelSize, 1);
kernel.Fill(1F / kernelSize);
var kernel = new float[kernelSize];
kernel.AsSpan().Fill(1F / kernelSize);
return kernel;
}
}

409
src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs

@ -1,7 +1,10 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@ -19,34 +22,26 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// Initializes a new instance of the <see cref="Convolution2PassProcessor{TPixel}"/> class.
/// </summary>
/// <param name="configuration">The configuration which allows altering default behaviour or extending the library.</param>
/// <param name="kernelX">The horizontal gradient operator.</param>
/// <param name="kernelY">The vertical gradient operator.</param>
/// <param name="kernel">The 1D convolution kernel.</param>
/// <param name="preserveAlpha">Whether the convolution filter is applied to alpha as well as the color channels.</param>
/// <param name="source">The source <see cref="Image{TPixel}"/> for the current processor instance.</param>
/// <param name="sourceRectangle">The source area to process for the current processor instance.</param>
public Convolution2PassProcessor(
Configuration configuration,
in DenseMatrix<float> kernelX,
in DenseMatrix<float> kernelY,
float[] kernel,
bool preserveAlpha,
Image<TPixel> source,
Rectangle sourceRectangle)
: base(configuration, source, sourceRectangle)
{
this.KernelX = kernelX;
this.KernelY = kernelY;
this.Kernel = kernel;
this.PreserveAlpha = preserveAlpha;
}
/// <summary>
/// Gets the horizontal convolution kernel.
/// Gets the convolution kernel.
/// </summary>
public DenseMatrix<float> KernelX { get; }
/// <summary>
/// Gets the vertical convolution kernel.
/// </summary>
public DenseMatrix<float> KernelY { get; }
public float[] Kernel { get; }
/// <summary>
/// Gets a value indicating whether the convolution filter is applied to alpha as well as the color channels.
@ -64,44 +59,364 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
// for source and target bulk pixel conversion.
var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height);
using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator))
// We can create a single sampling map with the size as if we were using the non separated 2D kernel
// the two 1D kernels represent, and reuse it across both convolution steps, like in the bokeh blur.
using var mapXY = new KernelSamplingMap(this.Configuration.MemoryAllocator);
mapXY.BuildSamplingOffsetMap(this.Kernel.Length, this.Kernel.Length, interest);
// Horizontal convolution
var horizontalOperation = new HorizontalConvolutionRowOperation(
interest,
firstPassPixels,
source.PixelBuffer,
mapXY,
this.Kernel,
this.Configuration,
this.PreserveAlpha);
ParallelRowIterator.IterateRows<HorizontalConvolutionRowOperation, Vector4>(
this.Configuration,
operationBounds,
in horizontalOperation);
// Vertical convolution
var verticalOperation = new VerticalConvolutionRowOperation(
interest,
source.PixelBuffer,
firstPassPixels,
mapXY,
this.Kernel,
this.Configuration,
this.PreserveAlpha);
ParallelRowIterator.IterateRows<VerticalConvolutionRowOperation, Vector4>(
this.Configuration,
operationBounds,
in verticalOperation);
}
/// <summary>
/// A <see langword="struct"/> implementing the logic for the horizontal 1D convolution.
/// </summary>
internal readonly struct HorizontalConvolutionRowOperation : IRowOperation<Vector4>
{
private readonly Rectangle bounds;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly KernelSamplingMap map;
private readonly float[] kernel;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public HorizontalConvolutionRowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
KernelSamplingMap map,
float[] kernel,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.map = map;
this.kernel = kernel;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
}
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Invoke(int y, Span<Vector4> span)
{
mapX.BuildSamplingOffsetMap(this.KernelX, interest);
// Horizontal convolution
var horizontalOperation = new ConvolutionRowOperation<TPixel>(
interest,
firstPassPixels,
source.PixelBuffer,
mapX,
this.KernelX,
this.Configuration,
this.PreserveAlpha);
ParallelRowIterator.IterateRows<ConvolutionRowOperation<TPixel>, Vector4>(
this.Configuration,
operationBounds,
in horizontalOperation);
if (this.preserveAlpha)
{
this.Convolve3(y, span);
}
else
{
this.Convolve4(y, span);
}
}
using (var mapY = new KernelSamplingMap(this.Configuration.MemoryAllocator))
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve3(int y, Span<Vector4> span)
{
mapY.BuildSamplingOffsetMap(this.KernelY, interest);
// Vertical convolution
var verticalOperation = new ConvolutionRowOperation<TPixel>(
interest,
source.PixelBuffer,
firstPassPixels,
mapY,
this.KernelY,
this.Configuration,
this.PreserveAlpha);
ParallelRowIterator.IterateRows<ConvolutionRowOperation<TPixel>, Vector4>(
this.Configuration,
operationBounds,
in verticalOperation);
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
int kernelSize = this.kernel.Length;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
// Clear the target buffer for each row run.
targetBuffer.Clear();
// Get the precalculated source sample row for this kernel row and copy to our buffer.
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
ref Vector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer);
ref Vector4 targetEnd = ref Unsafe.Add(ref targetStart, sourceBuffer.Length);
ref float kernelBase = ref this.kernel[0];
ref float kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize);
ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan());
while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd))
{
ref float kernelStart = ref kernelBase;
ref int sampleColumnStart = ref sampleColumnBase;
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{
Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX);
targetStart += kernelStart * sample;
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1);
}
targetStart = ref Unsafe.Add(ref targetStart, 1);
sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize);
}
// Now we need to copy the original alpha values from the source row.
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
targetStart = ref MemoryMarshal.GetReference(targetBuffer);
while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd))
{
targetStart.W = sourceBase.W;
targetStart = ref Unsafe.Add(ref targetStart, 1);
sourceBase = ref Unsafe.Add(ref sourceBase, 1);
}
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve4(int y, Span<Vector4> span)
{
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
int kernelSize = this.kernel.Length;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
// Clear the target buffer for each row run.
targetBuffer.Clear();
// Get the precalculated source sample row for this kernel row and copy to our buffer.
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
Numerics.Premultiply(sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
ref Vector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer);
ref Vector4 targetEnd = ref Unsafe.Add(ref targetStart, sourceBuffer.Length);
ref float kernelBase = ref this.kernel[0];
ref float kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize);
ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan());
while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd))
{
ref float kernelStart = ref kernelBase;
ref int sampleColumnStart = ref sampleColumnBase;
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{
Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX);
targetStart += kernelStart * sample;
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1);
}
targetStart = ref Unsafe.Add(ref targetStart, 1);
sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize);
}
Numerics.UnPremultiply(targetBuffer);
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
}
/// <summary>
/// A <see langword="struct"/> implementing the logic for the vertical 1D convolution.
/// </summary>
internal readonly struct VerticalConvolutionRowOperation : IRowOperation<Vector4>
{
private readonly Rectangle bounds;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly KernelSamplingMap map;
private readonly float[] kernel;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public VerticalConvolutionRowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
KernelSamplingMap map,
float[] kernel,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.map = map;
this.kernel = kernel;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
}
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Invoke(int y, Span<Vector4> span)
{
if (this.preserveAlpha)
{
this.Convolve3(y, span);
}
else
{
this.Convolve4(y, span);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve3(int y, Span<Vector4> span)
{
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
int kernelSize = this.kernel.Length;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize);
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ref float kernelStart = ref this.kernel[0];
ref float kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize);
Span<TPixel> sourceRow;
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
sourceRow = this.sourcePixels.GetRowSpan(sampleRowBase).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length);
ref Vector4 targetStart = ref targetBase;
float factor = kernelStart;
while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd))
{
targetStart += factor * sourceBase;
sourceBase = ref Unsafe.Add(ref sourceBase, 1);
targetStart = ref Unsafe.Add(ref targetStart, 1);
}
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1);
}
// Now we need to copy the original alpha values from the source row.
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
{
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length);
while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd))
{
targetBase.W = sourceBase.W;
targetBase = ref Unsafe.Add(ref targetBase, 1);
sourceBase = ref Unsafe.Add(ref sourceBase, 1);
}
}
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve4(int y, Span<Vector4> span)
{
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
int kernelSize = this.kernel.Length;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize);
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ref float kernelStart = ref this.kernel[0];
ref float kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize);
Span<TPixel> sourceRow;
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
sourceRow = this.sourcePixels.GetRowSpan(sampleRowBase).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length);
ref Vector4 targetStart = ref targetBase;
float factor = kernelStart;
while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd))
{
targetStart += factor * sourceBase;
sourceBase = ref Unsafe.Add(ref sourceBase, 1);
targetStart = ref Unsafe.Add(ref targetStart, 1);
}
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1);
}
Numerics.UnPremultiply(targetBuffer);
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
}
}

28
src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs

@ -12,17 +12,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// See <see href="http://chemaguerra.com/gaussian-filter-radius/"/>.
/// </summary>
internal static int GetDefaultGaussianRadius(float sigma)
{
return (int)MathF.Ceiling(sigma * 3);
}
=> (int)MathF.Ceiling(sigma * 3);
/// <summary>
/// Create a 1 dimensional Gaussian kernel using the Gaussian G(x) function.
/// </summary>
/// <returns>The <see cref="DenseMatrix{T}"/>.</returns>
internal static DenseMatrix<float> CreateGaussianBlurKernel(int size, float weight)
/// <returns>The convolution kernel.</returns>
internal static float[] CreateGaussianBlurKernel(int size, float weight)
{
var kernel = new DenseMatrix<float>(size, 1);
var kernel = new float[size];
float sum = 0F;
float midpoint = (size - 1) / 2F;
@ -32,13 +30,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
float x = i - midpoint;
float gx = Numerics.Gaussian(x, weight);
sum += gx;
kernel[0, i] = gx;
kernel[i] = gx;
}
// Normalize kernel so that the sum of all weights equals 1
for (int i = 0; i < size; i++)
{
kernel[0, i] /= sum;
kernel[i] /= sum;
}
return kernel;
@ -47,10 +45,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
/// <summary>
/// Create a 1 dimensional Gaussian kernel using the Gaussian G(x) function
/// </summary>
/// <returns>The <see cref="DenseMatrix{T}"/>.</returns>
internal static DenseMatrix<float> CreateGaussianSharpenKernel(int size, float weight)
/// <returns>The convolution kernel.</returns>
internal static float[] CreateGaussianSharpenKernel(int size, float weight)
{
var kernel = new DenseMatrix<float>(size, 1);
var kernel = new float[size];
float sum = 0;
@ -60,7 +58,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
float x = i - midpoint;
float gx = Numerics.Gaussian(x, weight);
sum += gx;
kernel[0, i] = gx;
kernel[i] = gx;
}
// Invert the kernel for sharpening.
@ -70,19 +68,19 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
if (i == midpointRounded)
{
// Calculate central value
kernel[0, i] = (2F * sum) - kernel[0, i];
kernel[i] = (2F * sum) - kernel[i];
}
else
{
// invert value
kernel[0, i] = -kernel[0, i];
kernel[i] = -kernel[i];
}
}
// Normalize kernel so that the sum of all weights equals 1
for (int i = 0; i < size; i++)
{
kernel[0, i] /= sum;
kernel[i] /= sum;
}
return kernel;

163
src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs

@ -1,163 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
{
/// <summary>
/// A <see langword="struct"/> implementing the logic for 1D convolution.
/// </summary>
internal readonly struct ConvolutionRowOperation<TPixel> : IRowOperation<Vector4>
where TPixel : unmanaged, IPixel<TPixel>
{
private readonly Rectangle bounds;
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<TPixel> sourcePixels;
private readonly KernelSamplingMap map;
private readonly DenseMatrix<float> kernelMatrix;
private readonly Configuration configuration;
private readonly bool preserveAlpha;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ConvolutionRowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<TPixel> sourcePixels,
KernelSamplingMap map,
DenseMatrix<float> kernelMatrix,
Configuration configuration,
bool preserveAlpha)
{
this.bounds = bounds;
this.targetPixels = targetPixels;
this.sourcePixels = sourcePixels;
this.map = map;
this.kernelMatrix = kernelMatrix;
this.configuration = configuration;
this.preserveAlpha = preserveAlpha;
}
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Invoke(int y, Span<Vector4> span)
{
if (this.preserveAlpha)
{
this.Convolve3(y, span);
}
else
{
this.Convolve4(y, span);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve3(int y, Span<Vector4> span)
{
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
var state = new ConvolutionState(in this.kernelMatrix, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ReadOnlyKernel kernel = state.Kernel;
Span<TPixel> sourceRow;
for (int kY = 0; kY < kernel.Rows; kY++)
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
for (int kX = 0; kX < kernel.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
target += kernel[kY, kX] * sample;
}
}
}
// Now we need to copy the original alpha values from the source row.
sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
for (int x = 0; x < sourceRow.Length; x++)
{
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W;
}
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Convolve4(int y, Span<Vector4> span)
{
// Span is 2x bounds.
int boundsX = this.bounds.X;
int boundsWidth = this.bounds.Width;
Span<Vector4> sourceBuffer = span.Slice(0, this.bounds.Width);
Span<Vector4> targetBuffer = span.Slice(this.bounds.Width);
var state = new ConvolutionState(in this.kernelMatrix, this.map);
ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y);
// Clear the target buffer for each row run.
targetBuffer.Clear();
ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
ReadOnlyKernel kernel = state.Kernel;
for (int kY = 0; kY < kernel.Rows; kY++)
{
// Get the precalculated source sample row for this kernel row and copy to our buffer.
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer);
Numerics.Premultiply(sourceBuffer);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer);
for (int x = 0; x < sourceBuffer.Length; x++)
{
ref int sampleColumnBase = ref state.GetSampleColumn(x);
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
for (int kX = 0; kX < kernel.Columns; kX++)
{
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
target += kernel[kY, kX] * sample;
}
}
}
Numerics.UnPremultiply(targetBuffer);
Span<TPixel> targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow);
}
}
}

14
src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs

@ -27,24 +27,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
: base(configuration, source, sourceRectangle)
{
int kernelSize = (definition.Radius * 2) + 1;
this.KernelX = ConvolutionProcessorHelpers.CreateGaussianBlurKernel(kernelSize, definition.Sigma);
this.KernelY = this.KernelX.Transpose();
this.Kernel = ConvolutionProcessorHelpers.CreateGaussianBlurKernel(kernelSize, definition.Sigma);
}
/// <summary>
/// Gets the horizontal gradient operator.
/// Gets the 1D convolution kernel.
/// </summary>
public DenseMatrix<float> KernelX { get; }
/// <summary>
/// Gets the vertical gradient operator.
/// </summary>
public DenseMatrix<float> KernelY { get; }
public float[] Kernel { get; }
/// <inheritdoc/>
protected override void OnFrameApply(ImageFrame<TPixel> source)
{
using var processor = new Convolution2PassProcessor<TPixel>(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle);
using var processor = new Convolution2PassProcessor<TPixel>(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle);
processor.Apply(source);
}

14
src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs

@ -27,24 +27,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
: base(configuration, source, sourceRectangle)
{
int kernelSize = (definition.Radius * 2) + 1;
this.KernelX = ConvolutionProcessorHelpers.CreateGaussianSharpenKernel(kernelSize, definition.Sigma);
this.KernelY = this.KernelX.Transpose();
this.Kernel = ConvolutionProcessorHelpers.CreateGaussianSharpenKernel(kernelSize, definition.Sigma);
}
/// <summary>
/// Gets the horizontal gradient operator.
/// Gets the 1D convolution kernel.
/// </summary>
public DenseMatrix<float> KernelX { get; }
/// <summary>
/// Gets the vertical gradient operator.
/// </summary>
public DenseMatrix<float> KernelY { get; }
public float[] Kernel { get; }
/// <inheritdoc/>
protected override void OnFrameApply(ImageFrame<TPixel> source)
{
using var processor = new Convolution2PassProcessor<TPixel>(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle);
using var processor = new Convolution2PassProcessor<TPixel>(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle);
processor.Apply(source);
}

Loading…
Cancel
Save