Browse Source

More codegen improvements to bokeh blur

js/color-alpha-handling
Sergio Pedri 5 years ago
parent
commit
a5bbe95e95
  1. 26
      src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs
  2. 21
      src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs

26
src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs

@ -134,23 +134,29 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
// The target buffer is zeroed initially and then it accumulates the results // The target buffer is zeroed initially and then it accumulates the results
// of each partial convolution, so we don't have to clear it here as well // of each partial convolution, so we don't have to clear it here as well
ref Vector4 targetBase = ref this.targetValues.GetElementUnsafe(boundsX, y); ref Vector4 targetBase = ref this.targetValues.GetElementUnsafe(boundsX, y);
ref Complex64 kernelBase = ref this.kernel[0]; ref Complex64 kernelStart = ref this.kernel[0];
ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize);
for (int kY = 0; kY < kernelSize; kY++) while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{ {
// Get the precalculated source sample row for this kernel row and copy to our buffer // Get the precalculated source sample row for this kernel row and copy to our buffer
int sampleY = Unsafe.Add(ref sampleRowBase, kY); ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleRowBase);
ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleY); ref ComplexVector4 sourceEnd = ref Unsafe.Add(ref sourceBase, boundsWidth);
Complex64 factor = Unsafe.Add(ref kernelBase, kY); ref Vector4 targetStart = ref targetBase;
Complex64 factor = kernelStart;
for (int x = 0; x < boundsWidth; x++) while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd))
{ {
ref Vector4 target = ref Unsafe.Add(ref targetBase, x); ComplexVector4 partial = factor * sourceBase;
ComplexVector4 sample = Unsafe.Add(ref sourceBase, x);
ComplexVector4 partial = factor * sample;
target += partial.WeightedSum(this.z, this.w); targetStart += partial.WeightedSum(this.z, this.w);
sourceBase = ref Unsafe.Add(ref sourceBase, 1);
targetStart = ref Unsafe.Add(ref targetStart, 1);
} }
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1);
} }
} }
} }

21
src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs

@ -233,32 +233,37 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution
// Clear the target buffer for each row run // Clear the target buffer for each row run
Span<ComplexVector4> targetBuffer = this.targetValues.GetRowSpan(y); Span<ComplexVector4> targetBuffer = this.targetValues.GetRowSpan(y);
targetBuffer.Clear(); targetBuffer.Clear();
ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
// Execute the bulk pixel format conversion for the current row // Execute the bulk pixel format conversion for the current row
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, span); PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, span);
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span);
ref ComplexVector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer);
ref ComplexVector4 targetEnd = ref Unsafe.Add(ref targetStart, span.Length);
ref Complex64 kernelBase = ref this.kernel[0]; ref Complex64 kernelBase = ref this.kernel[0];
ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize);
ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan());
for (int x = 0; x < span.Length; x++) while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd))
{ {
ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x); ref Complex64 kernelStart = ref kernelBase;
ref int sampleColumnStart = ref sampleColumnBase;
for (int kX = 0; kX < kernelSize; kX++) while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
{ {
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX);
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
Complex64 factor = Unsafe.Add(ref kernelBase, kX);
target.Sum(factor * sample); targetStart.Sum(kernelStart * sample);
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1);
} }
// Shift the base column sampling reference by one row at the end of each outer // Shift the base column sampling reference by one row at the end of each outer
// iteration so that the inner tight loop indexing can skip the multiplication // iteration so that the inner tight loop indexing can skip the multiplication
sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize);
targetStart = ref Unsafe.Add(ref targetStart, 1);
} }
} }
} }

Loading…
Cancel
Save