From 95f1f5a60a5778c62280783b79e64a9b030ac586 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Sun, 8 Nov 2020 13:06:28 +0100 Subject: [PATCH 01/19] Add unowned Image.WrapMemory(void*) overloads --- src/ImageSharp/Image.WrapMemory.cs | 66 +++++++++++++++++++ src/ImageSharp/Memory/ByteMemoryManager{T}.cs | 1 + .../Memory/UnmanagedMemoryManager{T}.cs | 60 +++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 src/ImageSharp/Memory/UnmanagedMemoryManager{T}.cs diff --git a/src/ImageSharp/Image.WrapMemory.cs b/src/ImageSharp/Image.WrapMemory.cs index d89c44dc5..b080c7888 100644 --- a/src/ImageSharp/Image.WrapMemory.cs +++ b/src/ImageSharp/Image.WrapMemory.cs @@ -220,5 +220,71 @@ namespace SixLabors.ImageSharp int height) where TPixel : unmanaged, IPixel => WrapMemory(Configuration.Default, byteMemory, width, height); + + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, + /// allowing to view/manipulate it as an instance. + /// + /// The pixel type + /// The + /// The pointer to the target memory buffer to wrap. + /// The width of the memory image. + /// The height of the memory image. + /// The . + /// The configuration is null. + /// The metadata is null. + /// An instance + public static unsafe Image WrapMemory( + Configuration configuration, + void* pointer, + int width, + int height, + ImageMetadata metadata) + where TPixel : unmanaged, IPixel + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.NotNull(metadata, nameof(metadata)); + + var memoryManager = new UnmanagedMemoryManager(pointer, width * height); + + var memorySource = MemoryGroup.Wrap(memoryManager.Memory); + return new Image(configuration, memorySource, width, height, metadata); + } + + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, + /// allowing to view/manipulate it as an instance. + /// + /// The pixel type + /// The + /// The pointer to the target memory buffer to wrap. + /// The width of the memory image. + /// The height of the memory image. + /// The configuration is null. + /// An instance. + public static unsafe Image WrapMemory( + Configuration configuration, + void* pointer, + int width, + int height) + where TPixel : unmanaged, IPixel + => WrapMemory(configuration, pointer, width, height, new ImageMetadata()); + + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, + /// allowing to view/manipulate it as an instance. + /// The memory is being observed, the caller remains responsible for managing it's lifecycle. + /// + /// The pixel type. + /// The pointer to the target memory buffer to wrap. + /// The width of the memory image. + /// The height of the memory image. + /// An instance. + public static unsafe Image WrapMemory( + void* pointer, + int width, + int height) + where TPixel : unmanaged, IPixel + => WrapMemory(Configuration.Default, pointer, width, height); } } diff --git a/src/ImageSharp/Memory/ByteMemoryManager{T}.cs b/src/ImageSharp/Memory/ByteMemoryManager{T}.cs index 223709df6..173163958 100644 --- a/src/ImageSharp/Memory/ByteMemoryManager{T}.cs +++ b/src/ImageSharp/Memory/ByteMemoryManager{T}.cs @@ -1,5 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. + using System; using System.Buffers; using System.Runtime.CompilerServices; diff --git a/src/ImageSharp/Memory/UnmanagedMemoryManager{T}.cs b/src/ImageSharp/Memory/UnmanagedMemoryManager{T}.cs new file mode 100644 index 000000000..58eaee320 --- /dev/null +++ b/src/ImageSharp/Memory/UnmanagedMemoryManager{T}.cs @@ -0,0 +1,60 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers; + +namespace SixLabors.ImageSharp.Memory +{ + /// + /// A custom that can wrap a rawpointer to a buffer of a specified type. + /// + /// The value type to use when casting the wrapped instance. + /// This manager doesn't own the memory buffer that it points to. + internal sealed unsafe class UnmanagedMemoryManager : MemoryManager + where T : unmanaged + { + /// + /// The pointer to the memory buffer. + /// + private readonly void* pointer; + + /// + /// The length of the memory area. + /// + private readonly int length; + + /// + /// Initializes a new instance of the class. + /// + /// The pointer to the memory buffer. + /// The length of the memory area. + public UnmanagedMemoryManager(void* pointer, int length) + { + this.pointer = pointer; + this.length = length; + } + + /// + protected override void Dispose(bool disposing) + { + } + + /// + public override Span GetSpan() + { + return new Span(this.pointer, this.length); + } + + /// + public override MemoryHandle Pin(int elementIndex = 0) + { + return new MemoryHandle(((T*)this.pointer) + elementIndex); + } + + /// + public override void Unpin() + { + } + } +} From 04b5978e0968867fa0cbc398fa47acbd19b8ebb9 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Sun, 8 Nov 2020 13:12:28 +0100 Subject: [PATCH 02/19] Add pointer null check on construction --- src/ImageSharp/Image.WrapMemory.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ImageSharp/Image.WrapMemory.cs b/src/ImageSharp/Image.WrapMemory.cs index b080c7888..dff888c13 100644 --- a/src/ImageSharp/Image.WrapMemory.cs +++ b/src/ImageSharp/Image.WrapMemory.cs @@ -242,6 +242,7 @@ namespace SixLabors.ImageSharp ImageMetadata metadata) where TPixel : unmanaged, IPixel { + Guard.IsFalse(pointer == null, nameof(pointer), "Pointer must be not null"); Guard.NotNull(configuration, nameof(configuration)); Guard.NotNull(metadata, nameof(metadata)); From 941534d13e4220d71d1d61262f461f9518f32265 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Sun, 8 Nov 2020 13:19:00 +0100 Subject: [PATCH 03/19] Add unit tests for WrapMemory(void*) overloads --- .../Image/ImageTests.WrapMemory.cs | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs b/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs index 7dc7dbb30..637b4d817 100644 --- a/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs +++ b/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs @@ -282,6 +282,76 @@ namespace SixLabors.ImageSharp.Tests } } + [Fact] + public unsafe void WrapMemory_FromPointer_CreatedImageIsCorrect() + { + var cfg = Configuration.CreateDefaultInstance(); + var metaData = new ImageMetadata(); + + var array = new Rgba32[25]; + + fixed (void* ptr = array) + { + using (var image = Image.WrapMemory(cfg, ptr, 5, 5, metaData)) + { + Assert.True(image.TryGetSinglePixelSpan(out Span imageSpan)); + ref Rgba32 pixel0 = ref imageSpan[0]; + Assert.True(Unsafe.AreSame(ref array[0], ref pixel0)); + ref Rgba32 pixel_1 = ref imageSpan[imageSpan.Length - 1]; + Assert.True(Unsafe.AreSame(ref array[array.Length - 1], ref pixel_1)); + + Assert.Equal(cfg, image.GetConfiguration()); + Assert.Equal(metaData, image.Metadata); + } + } + } + + [Fact] + public unsafe void WrapSystemDrawingBitmap_FromPointer_WhenObserved() + { + if (ShouldSkipBitmapTest) + { + return; + } + + using (var bmp = new Bitmap(51, 23)) + { + using (var memoryManager = new BitmapMemoryManager(bmp)) + { + Memory pixelMemory = memoryManager.Memory; + Bgra32 bg = Color.Red; + Bgra32 fg = Color.Green; + + fixed (void* p = pixelMemory.Span) + { + using (var image = Image.WrapMemory(p, bmp.Width, bmp.Height)) + { + Span pixelSpan = pixelMemory.Span; + Span imageSpan = image.GetRootFramePixelBuffer().GetSingleMemory().Span; + + Assert.Equal(pixelSpan.Length, imageSpan.Length); + Assert.True(Unsafe.AreSame(ref pixelSpan.GetPinnableReference(), ref imageSpan.GetPinnableReference())); + + Assert.True(image.TryGetSinglePixelSpan(out imageSpan)); + imageSpan.Fill(bg); + for (var i = 10; i < 20; i++) + { + image.GetPixelRowSpan(i).Slice(10, 10).Fill(fg); + } + } + + Assert.False(memoryManager.IsDisposed); + } + } + + string fn = System.IO.Path.Combine( + TestEnvironment.ActualOutputDirectoryFullPath, + $"{nameof(this.WrapSystemDrawingBitmap_WhenObserved)}.bmp"); + + bmp.Save(fn, ImageFormat.Bmp); + } + } + [Theory] [InlineData(0, 5, 5)] [InlineData(20, 5, 5)] @@ -333,6 +403,17 @@ namespace SixLabors.ImageSharp.Tests Assert.Throws(() => Image.WrapMemory(memory, height, width)); } + [Theory] + [InlineData(0, 5, 5)] + [InlineData(20, 5, 5)] + [InlineData(26, 5, 5)] + [InlineData(2, 1, 1)] + [InlineData(1023, 32, 32)] + public unsafe void WrapMemory_Pointer_Null(int size, int height, int width) + { + Assert.Throws(() => Image.WrapMemory((void*)null, height, width)); + } + private static bool ShouldSkipBitmapTest => !TestEnvironment.Is64BitProcess || (TestHelpers.ImageSharpBuiltAgainst != "netcoreapp3.1" && TestHelpers.ImageSharpBuiltAgainst != "netcoreapp2.1"); } From 96e714a0d226df57ee8a02edd225254f023834ee Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Sat, 12 Dec 2020 15:15:22 +0100 Subject: [PATCH 04/19] Improved XML docs for Image.WrapMemory APIs --- src/ImageSharp/Image.WrapMemory.cs | 90 +++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Image.WrapMemory.cs b/src/ImageSharp/Image.WrapMemory.cs index dff888c13..778592931 100644 --- a/src/ImageSharp/Image.WrapMemory.cs +++ b/src/ImageSharp/Image.WrapMemory.cs @@ -18,6 +18,14 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. + /// The ownership of the underlying buffer for the input instance is not being transferred to + /// the new instance, meaning that consumers of this method need to make sure the input buffer + /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is not disposed. + /// For instance, if the input instance is one retrieved from an instance + /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still + /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. /// /// The pixel type /// The @@ -47,6 +55,14 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. + /// The ownership of the underlying buffer for the input instance is not being transferred to + /// the new instance, meaning that consumers of this method need to make sure the input buffer + /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is not disposed. + /// For instance, if the input instance is one retrieved from an instance + /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still + /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. /// /// The pixel type /// The @@ -66,7 +82,14 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. - /// The memory is being observed, the caller remains responsible for managing it's lifecycle. + /// The ownership of the underlying buffer for the input instance is not being transferred to + /// the new instance, meaning that consumers of this method need to make sure the input buffer + /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is not disposed. + /// For instance, if the input instance is one retrieved from an instance + /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still + /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. /// /// The pixel type. /// The pixel memory. @@ -156,6 +179,14 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. + /// The ownership of the underlying buffer for the input instance is not being transferred to + /// the new instance, meaning that consumers of this method need to make sure the input buffer + /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is not disposed. + /// For instance, if the input instance is one retrieved from an instance + /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still + /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. /// /// The pixel type /// The @@ -188,6 +219,14 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. + /// The ownership of the underlying buffer for the input instance is not being transferred to + /// the new instance, meaning that consumers of this method need to make sure the input buffer + /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is not disposed. + /// For instance, if the input instance is one retrieved from an instance + /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still + /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. /// /// The pixel type /// The @@ -207,7 +246,14 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. - /// The memory is being observed, the caller remains responsible for managing it's lifecycle. + /// The ownership of the underlying buffer for the input instance is not being transferred to + /// the new instance, meaning that consumers of this method need to make sure the input buffer + /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is not disposed. + /// For instance, if the input instance is one retrieved from an instance + /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still + /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. /// /// The pixel type. /// The byte memory representing the pixel data. @@ -224,6 +270,19 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. + /// This method relies on callers to carefully manage the target memory area being referenced by the + /// pointer, and it requires that the lifetime of such a memory area is at least equal to that of the returned + /// instance. For instance, if the input pointer references an unmanaged memory area, + /// callers need to ensure that that memory area is not freed as long as the returned is + /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers + /// need to ensure that object will remain pinned as long as the instance is in use. + /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes. + /// Note that if you have a or an array (which can be cast to ) of + /// either or values, it is highly recommended to use one of the other + /// available overloads of this method instead (such as + /// or , to make the resulting code less error + /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when + /// doing interop or working with buffers that are located in unmanaged memory. /// /// The pixel type /// The @@ -255,6 +314,19 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. + /// This method relies on callers to carefully manage the target memory area being referenced by the + /// pointer, and it requires that the lifetime of such a memory area is at least equal to that of the returned + /// instance. For instance, if the input pointer references an unmanaged memory area, + /// callers need to ensure that that memory area is not freed as long as the returned is + /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers + /// need to ensure that object will remain pinned as long as the instance is in use. + /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes. + /// Note that if you have a or an array (which can be cast to ) of + /// either or values, it is highly recommended to use one of the other + /// available overloads of this method instead (such as + /// or , to make the resulting code less error + /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when + /// doing interop or working with buffers that are located in unmanaged memory. /// /// The pixel type /// The @@ -274,7 +346,19 @@ namespace SixLabors.ImageSharp /// /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, /// allowing to view/manipulate it as an instance. - /// The memory is being observed, the caller remains responsible for managing it's lifecycle. + /// This method relies on callers to carefully manage the target memory area being referenced by the + /// pointer, and it requires that the lifetime of such a memory area is at least equal to that of the returned + /// instance. For instance, if the input pointer references an unmanaged memory area, + /// callers need to ensure that that memory area is not freed as long as the returned is + /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers + /// need to ensure that object will remain pinned as long as the instance is in use. + /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes. + /// Note that if you have a or an array (which can be cast to ) of + /// either or values, it is highly recommended to use one of the other + /// available overloads of this method instead (such as + /// or , to make the resulting code less error + /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when + /// doing interop or working with buffers that are located in unmanaged memory. /// /// The pixel type. /// The pointer to the target memory buffer to wrap. From 4aa8bc6e57d2bb19bdbddf31c8a8c1d91a9b297d Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 14 Dec 2020 18:13:21 +0100 Subject: [PATCH 05/19] Improve formatting for XML docs --- src/ImageSharp/Image.WrapMemory.cs | 198 ++++++++++++++++++----------- 1 file changed, 126 insertions(+), 72 deletions(-) diff --git a/src/ImageSharp/Image.WrapMemory.cs b/src/ImageSharp/Image.WrapMemory.cs index 778592931..383f64396 100644 --- a/src/ImageSharp/Image.WrapMemory.cs +++ b/src/ImageSharp/Image.WrapMemory.cs @@ -16,16 +16,22 @@ namespace SixLabors.ImageSharp public abstract partial class Image { /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// The ownership of the underlying buffer for the input instance is not being transferred to - /// the new instance, meaning that consumers of this method need to make sure the input buffer - /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was - /// created), or that the owning object is not disposed until the returned is not disposed. - /// For instance, if the input instance is one retrieved from an instance + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: using this method does not transfer the ownership of the underlying buffer of the input + /// to the new instance. This means that consumers of this method must ensure that the input buffer + /// is either self-contained, (for example, a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is disposed. + /// + /// + /// If the input instance is one retrieved from an instance /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other - /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately. + /// /// /// The pixel type /// The @@ -53,16 +59,22 @@ namespace SixLabors.ImageSharp } /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// The ownership of the underlying buffer for the input instance is not being transferred to - /// the new instance, meaning that consumers of this method need to make sure the input buffer - /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was - /// created), or that the owning object is not disposed until the returned is not disposed. - /// For instance, if the input instance is one retrieved from an instance + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: using this method does not transfer the ownership of the underlying buffer of the input + /// to the new instance. This means that consumers of this method must ensure that the input buffer + /// is either self-contained, (for example, a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is disposed. + /// + /// + /// If the input instance is one retrieved from an instance /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other - /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately. + /// /// /// The pixel type /// The @@ -80,16 +92,22 @@ namespace SixLabors.ImageSharp => WrapMemory(configuration, pixelMemory, width, height, new ImageMetadata()); /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// The ownership of the underlying buffer for the input instance is not being transferred to - /// the new instance, meaning that consumers of this method need to make sure the input buffer - /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was - /// created), or that the owning object is not disposed until the returned is not disposed. - /// For instance, if the input instance is one retrieved from an instance + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: using this method does not transfer the ownership of the underlying buffer of the input + /// to the new instance. This means that consumers of this method must ensure that the input buffer + /// is either self-contained, (for example, a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is disposed. + /// + /// + /// If the input instance is one retrieved from an instance /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other - /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately. + /// /// /// The pixel type. /// The pixel memory. @@ -177,16 +195,22 @@ namespace SixLabors.ImageSharp => WrapMemory(Configuration.Default, pixelMemoryOwner, width, height); /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// The ownership of the underlying buffer for the input instance is not being transferred to - /// the new instance, meaning that consumers of this method need to make sure the input buffer - /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was - /// created), or that the owning object is not disposed until the returned is not disposed. - /// For instance, if the input instance is one retrieved from an instance + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: using this method does not transfer the ownership of the underlying buffer of the input + /// to the new instance. This means that consumers of this method must ensure that the input buffer + /// is either self-contained, (for example, a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is disposed. + /// + /// + /// If the input instance is one retrieved from an instance /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other - /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately. + /// /// /// The pixel type /// The @@ -217,16 +241,22 @@ namespace SixLabors.ImageSharp } /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// The ownership of the underlying buffer for the input instance is not being transferred to - /// the new instance, meaning that consumers of this method need to make sure the input buffer - /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was - /// created), or that the owning object is not disposed until the returned is not disposed. - /// For instance, if the input instance is one retrieved from an instance + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: using this method does not transfer the ownership of the underlying buffer of the input + /// to the new instance. This means that consumers of this method must ensure that the input buffer + /// is either self-contained, (for example, a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is disposed. + /// + /// + /// If the input instance is one retrieved from an instance /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other - /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately. + /// /// /// The pixel type /// The @@ -244,16 +274,22 @@ namespace SixLabors.ImageSharp => WrapMemory(configuration, byteMemory, width, height, new ImageMetadata()); /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// The ownership of the underlying buffer for the input instance is not being transferred to - /// the new instance, meaning that consumers of this method need to make sure the input buffer - /// is either self-contianed (for instance, this is the case for a instance wrapping a new array that was - /// created), or that the owning object is not disposed until the returned is not disposed. - /// For instance, if the input instance is one retrieved from an instance + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: using this method does not transfer the ownership of the underlying buffer of the input + /// to the new instance. This means that consumers of this method must ensure that the input buffer + /// is either self-contained, (for example, a instance wrapping a new array that was + /// created), or that the owning object is not disposed until the returned is disposed. + /// + /// + /// If the input instance is one retrieved from an instance /// rented from a memory pool (such as ), and that owning instance is disposed while the image is still /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other - /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers properly. + /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately. + /// /// /// The pixel type. /// The byte memory representing the pixel data. @@ -268,21 +304,27 @@ namespace SixLabors.ImageSharp => WrapMemory(Configuration.Default, byteMemory, width, height); /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// This method relies on callers to carefully manage the target memory area being referenced by the - /// pointer, and it requires that the lifetime of such a memory area is at least equal to that of the returned - /// instance. For instance, if the input pointer references an unmanaged memory area, - /// callers need to ensure that that memory area is not freed as long as the returned is + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: this method relies on callers to carefully manage the target memory area being referenced by the + /// pointer and that the lifetime of such a memory area is at least equal to that of the returned + /// instance. For example, if the input pointer references an unmanaged memory area, + /// callers must ensure that the memory area is not freed as long as the returned is /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers - /// need to ensure that object will remain pinned as long as the instance is in use. + /// must ensure that objects will remain pinned as long as the instance is in use. /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes. - /// Note that if you have a or an array (which can be cast to ) of + /// + /// + /// Note also that if you have a or an array (which can be cast to ) of /// either or values, it is highly recommended to use one of the other /// available overloads of this method instead (such as /// or , to make the resulting code less error /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when /// doing interop or working with buffers that are located in unmanaged memory. + /// /// /// The pixel type /// The @@ -312,21 +354,27 @@ namespace SixLabors.ImageSharp } /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// This method relies on callers to carefully manage the target memory area being referenced by the - /// pointer, and it requires that the lifetime of such a memory area is at least equal to that of the returned - /// instance. For instance, if the input pointer references an unmanaged memory area, - /// callers need to ensure that that memory area is not freed as long as the returned is + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: this method relies on callers to carefully manage the target memory area being referenced by the + /// pointer and that the lifetime of such a memory area is at least equal to that of the returned + /// instance. For example, if the input pointer references an unmanaged memory area, + /// callers must ensure that the memory area is not freed as long as the returned is /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers - /// need to ensure that object will remain pinned as long as the instance is in use. + /// must ensure that objects will remain pinned as long as the instance is in use. /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes. - /// Note that if you have a or an array (which can be cast to ) of + /// + /// + /// Note also that if you have a or an array (which can be cast to ) of /// either or values, it is highly recommended to use one of the other /// available overloads of this method instead (such as /// or , to make the resulting code less error /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when /// doing interop or working with buffers that are located in unmanaged memory. + /// /// /// The pixel type /// The @@ -344,21 +392,27 @@ namespace SixLabors.ImageSharp => WrapMemory(configuration, pointer, width, height, new ImageMetadata()); /// - /// Wraps an existing contiguous memory area of 'width' x 'height' pixels, - /// allowing to view/manipulate it as an instance. - /// This method relies on callers to carefully manage the target memory area being referenced by the - /// pointer, and it requires that the lifetime of such a memory area is at least equal to that of the returned - /// instance. For instance, if the input pointer references an unmanaged memory area, - /// callers need to ensure that that memory area is not freed as long as the returned is + /// + /// Wraps an existing contiguous memory area of 'width' x 'height' pixels allowing viewing/manipulation as + /// an instance. + /// + /// + /// Please note: this method relies on callers to carefully manage the target memory area being referenced by the + /// pointer and that the lifetime of such a memory area is at least equal to that of the returned + /// instance. For example, if the input pointer references an unmanaged memory area, + /// callers must ensure that the memory area is not freed as long as the returned is /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers - /// need to ensure that object will remain pinned as long as the instance is in use. + /// must ensure that objects will remain pinned as long as the instance is in use. /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes. - /// Note that if you have a or an array (which can be cast to ) of + /// + /// + /// Note also that if you have a or an array (which can be cast to ) of /// either or values, it is highly recommended to use one of the other /// available overloads of this method instead (such as /// or , to make the resulting code less error /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when /// doing interop or working with buffers that are located in unmanaged memory. + /// /// /// The pixel type. /// The pointer to the target memory buffer to wrap. From 077aedc81a24b0136b67ade7ab6da581a6c0a968 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Mon, 14 Dec 2020 23:05:13 +0100 Subject: [PATCH 06/19] Fix inaccurate test method name Co-authored-by: Anton Firszov --- tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs b/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs index 637b4d817..02a838180 100644 --- a/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs +++ b/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs @@ -307,7 +307,7 @@ namespace SixLabors.ImageSharp.Tests } [Fact] - public unsafe void WrapSystemDrawingBitmap_FromPointer_WhenObserved() + public unsafe void WrapSystemDrawingBitmap_FromPointer() { if (ShouldSkipBitmapTest) { From b62d3ec863b96d82dde00ac90de9b7a3e2046254 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 18:35:37 +0100 Subject: [PATCH 07/19] Port horizontal convolution processor, remove Y loop --- .../Convolution2PassProcessor{TPixel}.cs | 149 +++++++++++++++++- 1 file changed, 147 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 16ce0fdd7..ba4e0a6ad 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -1,7 +1,10 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -69,7 +72,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution mapX.BuildSamplingOffsetMap(this.KernelX, interest); // Horizontal convolution - var horizontalOperation = new ConvolutionRowOperation( + var horizontalOperation = new HorizontalConvolutionRowOperation( interest, firstPassPixels, source.PixelBuffer, @@ -78,7 +81,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows, Vector4>( + ParallelRowIterator.IterateRows( this.Configuration, operationBounds, in horizontalOperation); @@ -104,5 +107,147 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution in verticalOperation); } } + + /// + /// A implementing the logic for the horizontal 1D convolution. + /// + internal readonly struct HorizontalConvolutionRowOperation : IRowOperation + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrix; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public HorizontalConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrix, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrix = kernelMatrix; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + // Get the precalculated source sample row for this kernel row and copy to our buffer. + ReadOnlyKernel kernel = state.Kernel; + int sampleY = Unsafe.Add(ref sampleRowBase, 0); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + target += kernel[0, kX] * sample; + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + // Get the precalculated source sample row for this kernel row and copy to our buffer. + ReadOnlyKernel kernel = state.Kernel; + int sampleY = Unsafe.Add(ref sampleRowBase, 0); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + + for (int kX = 0; kX < kernel.Columns; kX++) + { + int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + target += kernel[0, kX] * sample; + } + } + + Numerics.UnPremultiply(targetBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + } } } From d41cf15023131743e48d9303991d8d78b9c154fd Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 18:37:56 +0100 Subject: [PATCH 08/19] Port vertical convolution processor, remove X loop --- .../Convolution2PassProcessor{TPixel}.cs | 147 +++++++++++++++++- 1 file changed, 145 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index ba4e0a6ad..7a472a207 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -92,7 +92,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution mapY.BuildSamplingOffsetMap(this.KernelY, interest); // Vertical convolution - var verticalOperation = new ConvolutionRowOperation( + var verticalOperation = new VerticalConvolutionRowOperation( interest, source.PixelBuffer, firstPassPixels, @@ -101,7 +101,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.Configuration, this.PreserveAlpha); - ParallelRowIterator.IterateRows, Vector4>( + ParallelRowIterator.IterateRows( this.Configuration, operationBounds, in verticalOperation); @@ -249,5 +249,148 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); } } + + /// + /// A implementing the logic for the vertical 1D convolution. + /// + internal readonly struct VerticalConvolutionRowOperation : IRowOperation + { + private readonly Rectangle bounds; + private readonly Buffer2D targetPixels; + private readonly Buffer2D sourcePixels; + private readonly KernelSamplingMap map; + private readonly DenseMatrix kernelMatrix; + private readonly Configuration configuration; + private readonly bool preserveAlpha; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public VerticalConvolutionRowOperation( + Rectangle bounds, + Buffer2D targetPixels, + Buffer2D sourcePixels, + KernelSamplingMap map, + DenseMatrix kernelMatrix, + Configuration configuration, + bool preserveAlpha) + { + this.bounds = bounds; + this.targetPixels = targetPixels; + this.sourcePixels = sourcePixels; + this.map = map; + this.kernelMatrix = kernelMatrix; + this.configuration = configuration; + this.preserveAlpha = preserveAlpha; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Invoke(int y, Span span) + { + if (this.preserveAlpha) + { + this.Convolve3(y, span); + } + else + { + this.Convolve4(y, span); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve3(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + ReadOnlyKernel kernel = state.Kernel; + Span sourceRow; + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + int sampleX = Unsafe.Add(ref sampleColumnBase, 0) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + + target += kernel[kY, 0] * sample; + } + } + + // Now we need to copy the original alpha values from the source row. + sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + for (int x = 0; x < sourceRow.Length; x++) + { + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + } + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Convolve4(int y, Span span) + { + // Span is 2x bounds. + int boundsX = this.bounds.X; + int boundsWidth = this.bounds.Width; + Span sourceBuffer = span.Slice(0, this.bounds.Width); + Span targetBuffer = span.Slice(this.bounds.Width); + + var state = new ConvolutionState(in this.kernelMatrix, this.map); + ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + + // Clear the target buffer for each row run. + targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + + ReadOnlyKernel kernel = state.Kernel; + for (int kY = 0; kY < kernel.Rows; kY++) + { + // Get the precalculated source sample row for this kernel row and copy to our buffer. + int sampleY = Unsafe.Add(ref sampleRowBase, kY); + Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); + + Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + + for (int x = 0; x < sourceBuffer.Length; x++) + { + ref int sampleColumnBase = ref state.GetSampleColumn(x); + ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + int sampleX = Unsafe.Add(ref sampleColumnBase, 0) - boundsX; + Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + + target += kernel[kY, 0] * sample; + } + } + + Numerics.UnPremultiply(targetBuffer); + + Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); + PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); + } + } } } From 9959ba0d0b2092524f415097445442ec00d45b26 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 18:50:34 +0100 Subject: [PATCH 09/19] Remove unnecessary inner loop coordinate sampling --- .../Convolution2PassProcessor{TPixel}.cs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 7a472a207..3b9130f3b 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -164,7 +164,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span targetBuffer = span.Slice(this.bounds.Width); var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); // Clear the target buffer for each row run. targetBuffer.Clear(); @@ -172,8 +171,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Get the precalculated source sample row for this kernel row and copy to our buffer. ReadOnlyKernel kernel = state.Kernel; - int sampleY = Unsafe.Add(ref sampleRowBase, 0); - Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); @@ -215,7 +213,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Span targetBuffer = span.Slice(this.bounds.Width); var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); // Clear the target buffer for each row run. targetBuffer.Clear(); @@ -223,8 +220,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Get the precalculated source sample row for this kernel row and copy to our buffer. ReadOnlyKernel kernel = state.Kernel; - int sampleY = Unsafe.Add(ref sampleRowBase, 0); - Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); Numerics.Premultiply(sourceBuffer); @@ -325,10 +321,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int x = 0; x < sourceBuffer.Length; x++) { - ref int sampleColumnBase = ref state.GetSampleColumn(x); ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - int sampleX = Unsafe.Add(ref sampleColumnBase, 0) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + Vector4 sample = Unsafe.Add(ref sourceBase, x); target += kernel[kY, 0] * sample; } @@ -377,10 +371,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution for (int x = 0; x < sourceBuffer.Length; x++) { - ref int sampleColumnBase = ref state.GetSampleColumn(x); ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - int sampleX = Unsafe.Add(ref sampleColumnBase, 0) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); + Vector4 sample = Unsafe.Add(ref sourceBase, x); target += kernel[kY, 0] * sample; } From 82d0ba44cfcad086d9eb37b99c4af7f034905b5f Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 18:52:10 +0100 Subject: [PATCH 10/19] Switch to shared sampling map for convolution passes --- .../Convolution2PassProcessor{TPixel}.cs | 74 +++++++++---------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 3b9130f3b..c2f3ec59a 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -67,45 +67,41 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // for source and target bulk pixel conversion. var operationBounds = new Rectangle(interest.X, interest.Y, interest.Width * 2, interest.Height); - using (var mapX = new KernelSamplingMap(this.Configuration.MemoryAllocator)) - { - mapX.BuildSamplingOffsetMap(this.KernelX, interest); - - // Horizontal convolution - var horizontalOperation = new HorizontalConvolutionRowOperation( - interest, - firstPassPixels, - source.PixelBuffer, - mapX, - this.KernelX, - this.Configuration, - this.PreserveAlpha); - - ParallelRowIterator.IterateRows( - this.Configuration, - operationBounds, - in horizontalOperation); - } - - using (var mapY = new KernelSamplingMap(this.Configuration.MemoryAllocator)) - { - mapY.BuildSamplingOffsetMap(this.KernelY, interest); - - // Vertical convolution - var verticalOperation = new VerticalConvolutionRowOperation( - interest, - source.PixelBuffer, - firstPassPixels, - mapY, - this.KernelY, - this.Configuration, - this.PreserveAlpha); - - ParallelRowIterator.IterateRows( - this.Configuration, - operationBounds, - in verticalOperation); - } + // We can create a single sampling map with the size as if we were using the non separated 2D kernel + // the two 1D kernels represent, and reuse it across both convolution steps, like in the bokeh blur. + using var mapXY = new KernelSamplingMap(this.Configuration.MemoryAllocator); + + mapXY.BuildSamplingOffsetMap(this.KernelY.Rows, this.KernelX.Columns, interest); + + // Horizontal convolution + var horizontalOperation = new HorizontalConvolutionRowOperation( + interest, + firstPassPixels, + source.PixelBuffer, + mapXY, + this.KernelX, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows( + this.Configuration, + operationBounds, + in horizontalOperation); + + // Vertical convolution + var verticalOperation = new VerticalConvolutionRowOperation( + interest, + source.PixelBuffer, + firstPassPixels, + mapXY, + this.KernelY, + this.Configuration, + this.PreserveAlpha); + + ParallelRowIterator.IterateRows( + this.Configuration, + operationBounds, + in verticalOperation); } /// From 9c2ce4105ef7a5f81358d889a854ee65f5ee3846 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 19:13:18 +0100 Subject: [PATCH 11/19] Remove convolution state, more optimizations --- .../Convolution/BokehBlurProcessor.cs | 3 +- .../Convolution2PassProcessor{TPixel}.cs | 60 ++++++++++++------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs index d4fb27a57..55cef5df5 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs @@ -129,8 +129,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution int boundsWidth = this.bounds.Width; int kernelSize = this.kernel.Length; - Span rowOffsets = this.map.GetRowOffsetSpan(); - ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(rowOffsets), (y - this.bounds.Y) * kernelSize); + ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize); // The target buffer is zeroed initially and then it accumulates the results // of each partial convolution, so we don't have to clear it here as well diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index c2f3ec59a..c7f5c94dd 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -156,33 +156,38 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; + int kernelSize = this.kernelMatrix.Columns; + Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); - var state = new ConvolutionState(in this.kernelMatrix, this.map); - // Clear the target buffer for each row run. targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Get the precalculated source sample row for this kernel row and copy to our buffer. - ReadOnlyKernel kernel = state.Kernel; Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref float kernelBase = ref this.kernelMatrix[0, 0]; + ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); for (int x = 0; x < sourceBuffer.Length; x++) { - ref int sampleColumnBase = ref state.GetSampleColumn(x); ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - for (int kX = 0; kX < kernel.Columns; kX++) + for (int kX = 0; kX < kernelSize; kX++) { int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - target += kernel[0, kX] * sample; + float factor = Unsafe.Add(ref kernelBase, kX); + + target += factor * sample; } + + // Shift the base column sampling reference by one row + sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); } // Now we need to copy the original alpha values from the source row. @@ -205,34 +210,39 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; + int kernelSize = this.kernelMatrix.Columns; + Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); - var state = new ConvolutionState(in this.kernelMatrix, this.map); - // Clear the target buffer for each row run. targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Get the precalculated source sample row for this kernel row and copy to our buffer. - ReadOnlyKernel kernel = state.Kernel; Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref float kernelBase = ref this.kernelMatrix[0, 0]; + ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); for (int x = 0; x < sourceBuffer.Length; x++) { - ref int sampleColumnBase = ref state.GetSampleColumn(x); ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - for (int kX = 0; kX < kernel.Columns; kX++) + for (int kX = 0; kX < kernelSize; kX++) { int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - target += kernel[0, kX] * sample; + float factor = Unsafe.Add(ref kernelBase, kX); + + target += factor * sample; } + + sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); } Numerics.UnPremultiply(targetBuffer); @@ -294,33 +304,37 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; + int kernelSize = this.kernelMatrix.Rows; + Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize); // Clear the target buffer for each row run. targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ref float kernelBase = ref this.kernelMatrix[0, 0]; - ReadOnlyKernel kernel = state.Kernel; Span sourceRow; - for (int kY = 0; kY < kernel.Rows; kY++) + for (int kY = 0; kY < kernelSize; kY++) { // Get the precalculated source sample row for this kernel row and copy to our buffer. int sampleY = Unsafe.Add(ref sampleRowBase, kY); sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + float factor = Unsafe.Add(ref kernelBase, kY); for (int x = 0; x < sourceBuffer.Length; x++) { ref Vector4 target = ref Unsafe.Add(ref targetBase, x); Vector4 sample = Unsafe.Add(ref sourceBase, x); - target += kernel[kY, 0] * sample; + target += factor * sample; } } @@ -344,6 +358,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; + int kernelSize = this.kernelMatrix.Rows; + Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); @@ -352,25 +368,29 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Clear the target buffer for each row run. targetBuffer.Clear(); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ref float kernelBase = ref this.kernelMatrix[0, 0]; - ReadOnlyKernel kernel = state.Kernel; - for (int kY = 0; kY < kernel.Rows; kY++) + for (int kY = 0; kY < kernelSize; kY++) { // Get the precalculated source sample row for this kernel row and copy to our buffer. int sampleY = Unsafe.Add(ref sampleRowBase, kY); Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); Numerics.Premultiply(sourceBuffer); + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + float factor = Unsafe.Add(ref kernelBase, kY); for (int x = 0; x < sourceBuffer.Length; x++) { ref Vector4 target = ref Unsafe.Add(ref targetBase, x); Vector4 sample = Unsafe.Add(ref sourceBase, x); - target += kernel[kY, 0] * sample; + target += factor * sample; } } From e8bf26546885c9e141db09e6941ceacc9d4e3c77 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 19:20:45 +0100 Subject: [PATCH 12/19] Remove transposed 1D kernels, switch to float[] type --- .../Convolution/BoxBlurProcessor{TPixel}.cs | 23 ++++---- .../Convolution2PassProcessor{TPixel}.cs | 55 ++++++++----------- .../ConvolutionProcessorHelpers.cs | 28 +++++----- .../GaussianBlurProcessor{TPixel}.cs | 14 ++--- .../GaussianSharpenProcessor{TPixel}.cs | 14 ++--- 5 files changed, 54 insertions(+), 80 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs index 8c5358770..5beadb0ce 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BoxBlurProcessor{TPixel}.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Processing.Processors.Convolution @@ -23,24 +24,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution : base(configuration, source, sourceRectangle) { int kernelSize = (definition.Radius * 2) + 1; - this.KernelX = CreateBoxKernel(kernelSize); - this.KernelY = this.KernelX.Transpose(); + this.Kernel = CreateBoxKernel(kernelSize); } /// - /// Gets the horizontal gradient operator. + /// Gets the 1D convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical gradient operator. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// protected override void OnFrameApply(ImageFrame source) { - using var processor = new Convolution2PassProcessor(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle); + using var processor = new Convolution2PassProcessor(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle); processor.Apply(source); } @@ -50,10 +45,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// /// The maximum size of the kernel in either direction. /// The . - private static DenseMatrix CreateBoxKernel(int kernelSize) + private static float[] CreateBoxKernel(int kernelSize) { - var kernel = new DenseMatrix(kernelSize, 1); - kernel.Fill(1F / kernelSize); + var kernel = new float[kernelSize]; + + kernel.AsSpan().Fill(1F / kernelSize); + return kernel; } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index c7f5c94dd..9b7ed7580 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -22,34 +22,26 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// Initializes a new instance of the class. /// /// The configuration which allows altering default behaviour or extending the library. - /// The horizontal gradient operator. - /// The vertical gradient operator. + /// The 1D convolution kernel. /// Whether the convolution filter is applied to alpha as well as the color channels. /// The source for the current processor instance. /// The source area to process for the current processor instance. public Convolution2PassProcessor( Configuration configuration, - in DenseMatrix kernelX, - in DenseMatrix kernelY, + float[] kernel, bool preserveAlpha, Image source, Rectangle sourceRectangle) : base(configuration, source, sourceRectangle) { - this.KernelX = kernelX; - this.KernelY = kernelY; + this.Kernel = kernel; this.PreserveAlpha = preserveAlpha; } /// - /// Gets the horizontal convolution kernel. + /// Gets the convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical convolution kernel. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// /// Gets a value indicating whether the convolution filter is applied to alpha as well as the color channels. @@ -71,7 +63,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // the two 1D kernels represent, and reuse it across both convolution steps, like in the bokeh blur. using var mapXY = new KernelSamplingMap(this.Configuration.MemoryAllocator); - mapXY.BuildSamplingOffsetMap(this.KernelY.Rows, this.KernelX.Columns, interest); + mapXY.BuildSamplingOffsetMap(this.Kernel.Length, this.Kernel.Length, interest); // Horizontal convolution var horizontalOperation = new HorizontalConvolutionRowOperation( @@ -79,7 +71,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution firstPassPixels, source.PixelBuffer, mapXY, - this.KernelX, + this.Kernel, this.Configuration, this.PreserveAlpha); @@ -94,7 +86,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution source.PixelBuffer, firstPassPixels, mapXY, - this.KernelY, + this.Kernel, this.Configuration, this.PreserveAlpha); @@ -113,7 +105,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; private readonly KernelSamplingMap map; - private readonly DenseMatrix kernelMatrix; + private readonly float[] kernel; private readonly Configuration configuration; private readonly bool preserveAlpha; @@ -123,7 +115,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Buffer2D targetPixels, Buffer2D sourcePixels, KernelSamplingMap map, - DenseMatrix kernelMatrix, + float[] kernel, Configuration configuration, bool preserveAlpha) { @@ -131,7 +123,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.targetPixels = targetPixels; this.sourcePixels = sourcePixels; this.map = map; - this.kernelMatrix = kernelMatrix; + this.kernel = kernel; this.configuration = configuration; this.preserveAlpha = preserveAlpha; } @@ -156,7 +148,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; - int kernelSize = this.kernelMatrix.Columns; + int kernelSize = this.kernel.Length; Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); @@ -170,7 +162,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - ref float kernelBase = ref this.kernelMatrix[0, 0]; + ref float kernelBase = ref this.kernel[0]; ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); for (int x = 0; x < sourceBuffer.Length; x++) @@ -210,7 +202,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; - int kernelSize = this.kernelMatrix.Columns; + int kernelSize = this.kernel.Length; Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); @@ -226,7 +218,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Numerics.Premultiply(sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - ref float kernelBase = ref this.kernelMatrix[0, 0]; + ref float kernelBase = ref this.kernel[0]; ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); for (int x = 0; x < sourceBuffer.Length; x++) @@ -261,7 +253,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution private readonly Buffer2D targetPixels; private readonly Buffer2D sourcePixels; private readonly KernelSamplingMap map; - private readonly DenseMatrix kernelMatrix; + private readonly float[] kernel; private readonly Configuration configuration; private readonly bool preserveAlpha; @@ -271,7 +263,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Buffer2D targetPixels, Buffer2D sourcePixels, KernelSamplingMap map, - DenseMatrix kernelMatrix, + float[] kernel, Configuration configuration, bool preserveAlpha) { @@ -279,7 +271,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution this.targetPixels = targetPixels; this.sourcePixels = sourcePixels; this.map = map; - this.kernelMatrix = kernelMatrix; + this.kernel = kernel; this.configuration = configuration; this.preserveAlpha = preserveAlpha; } @@ -304,7 +296,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; - int kernelSize = this.kernelMatrix.Rows; + int kernelSize = this.kernel.Length; Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); @@ -315,7 +307,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - ref float kernelBase = ref this.kernelMatrix[0, 0]; + ref float kernelBase = ref this.kernel[0]; Span sourceRow; for (int kY = 0; kY < kernelSize; kY++) @@ -358,19 +350,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Span is 2x bounds. int boundsX = this.bounds.X; int boundsWidth = this.bounds.Width; - int kernelSize = this.kernelMatrix.Rows; + int kernelSize = this.kernel.Length; Span sourceBuffer = span.Slice(0, this.bounds.Width); Span targetBuffer = span.Slice(this.bounds.Width); - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); + ref int sampleRowBase = ref Unsafe.Add(ref MemoryMarshal.GetReference(this.map.GetRowOffsetSpan()), (y - this.bounds.Y) * kernelSize); // Clear the target buffer for each row run. targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - ref float kernelBase = ref this.kernelMatrix[0, 0]; + ref float kernelBase = ref this.kernel[0]; for (int kY = 0; kY < kernelSize; kY++) { diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs index 9844f9956..f93cdabc4 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionProcessorHelpers.cs @@ -12,17 +12,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// See . /// internal static int GetDefaultGaussianRadius(float sigma) - { - return (int)MathF.Ceiling(sigma * 3); - } + => (int)MathF.Ceiling(sigma * 3); /// /// Create a 1 dimensional Gaussian kernel using the Gaussian G(x) function. /// - /// The . - internal static DenseMatrix CreateGaussianBlurKernel(int size, float weight) + /// The convolution kernel. + internal static float[] CreateGaussianBlurKernel(int size, float weight) { - var kernel = new DenseMatrix(size, 1); + var kernel = new float[size]; float sum = 0F; float midpoint = (size - 1) / 2F; @@ -32,13 +30,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution float x = i - midpoint; float gx = Numerics.Gaussian(x, weight); sum += gx; - kernel[0, i] = gx; + kernel[i] = gx; } // Normalize kernel so that the sum of all weights equals 1 for (int i = 0; i < size; i++) { - kernel[0, i] /= sum; + kernel[i] /= sum; } return kernel; @@ -47,10 +45,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution /// /// Create a 1 dimensional Gaussian kernel using the Gaussian G(x) function /// - /// The . - internal static DenseMatrix CreateGaussianSharpenKernel(int size, float weight) + /// The convolution kernel. + internal static float[] CreateGaussianSharpenKernel(int size, float weight) { - var kernel = new DenseMatrix(size, 1); + var kernel = new float[size]; float sum = 0; @@ -60,7 +58,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution float x = i - midpoint; float gx = Numerics.Gaussian(x, weight); sum += gx; - kernel[0, i] = gx; + kernel[i] = gx; } // Invert the kernel for sharpening. @@ -70,19 +68,19 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution if (i == midpointRounded) { // Calculate central value - kernel[0, i] = (2F * sum) - kernel[0, i]; + kernel[i] = (2F * sum) - kernel[i]; } else { // invert value - kernel[0, i] = -kernel[0, i]; + kernel[i] = -kernel[i]; } } // Normalize kernel so that the sum of all weights equals 1 for (int i = 0; i < size; i++) { - kernel[0, i] /= sum; + kernel[i] /= sum; } return kernel; diff --git a/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs index a9b692a01..4ade01f91 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/GaussianBlurProcessor{TPixel}.cs @@ -27,24 +27,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution : base(configuration, source, sourceRectangle) { int kernelSize = (definition.Radius * 2) + 1; - this.KernelX = ConvolutionProcessorHelpers.CreateGaussianBlurKernel(kernelSize, definition.Sigma); - this.KernelY = this.KernelX.Transpose(); + this.Kernel = ConvolutionProcessorHelpers.CreateGaussianBlurKernel(kernelSize, definition.Sigma); } /// - /// Gets the horizontal gradient operator. + /// Gets the 1D convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical gradient operator. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// protected override void OnFrameApply(ImageFrame source) { - using var processor = new Convolution2PassProcessor(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle); + using var processor = new Convolution2PassProcessor(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle); processor.Apply(source); } diff --git a/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs index 5e20865e5..73aaaec18 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/GaussianSharpenProcessor{TPixel}.cs @@ -27,24 +27,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution : base(configuration, source, sourceRectangle) { int kernelSize = (definition.Radius * 2) + 1; - this.KernelX = ConvolutionProcessorHelpers.CreateGaussianSharpenKernel(kernelSize, definition.Sigma); - this.KernelY = this.KernelX.Transpose(); + this.Kernel = ConvolutionProcessorHelpers.CreateGaussianSharpenKernel(kernelSize, definition.Sigma); } /// - /// Gets the horizontal gradient operator. + /// Gets the 1D convolution kernel. /// - public DenseMatrix KernelX { get; } - - /// - /// Gets the vertical gradient operator. - /// - public DenseMatrix KernelY { get; } + public float[] Kernel { get; } /// protected override void OnFrameApply(ImageFrame source) { - using var processor = new Convolution2PassProcessor(this.Configuration, this.KernelX, this.KernelY, false, this.Source, this.SourceRectangle); + using var processor = new Convolution2PassProcessor(this.Configuration, this.Kernel, false, this.Source, this.SourceRectangle); processor.Apply(source); } From 4609fa0b407be7690a296825f05ec1a494438db9 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 19:23:38 +0100 Subject: [PATCH 13/19] Remove leftover ConvolutionRowOperation type --- .../ConvolutionRowOperation{TPixel}.cs | 163 ------------------ 1 file changed, 163 deletions(-) delete mode 100644 src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs diff --git a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs deleted file mode 100644 index 9876b2885..000000000 --- a/src/ImageSharp/Processing/Processors/Convolution/ConvolutionRowOperation{TPixel}.cs +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Advanced; -using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.PixelFormats; - -namespace SixLabors.ImageSharp.Processing.Processors.Convolution -{ - /// - /// A implementing the logic for 1D convolution. - /// - internal readonly struct ConvolutionRowOperation : IRowOperation - where TPixel : unmanaged, IPixel - { - private readonly Rectangle bounds; - private readonly Buffer2D targetPixels; - private readonly Buffer2D sourcePixels; - private readonly KernelSamplingMap map; - private readonly DenseMatrix kernelMatrix; - private readonly Configuration configuration; - private readonly bool preserveAlpha; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public ConvolutionRowOperation( - Rectangle bounds, - Buffer2D targetPixels, - Buffer2D sourcePixels, - KernelSamplingMap map, - DenseMatrix kernelMatrix, - Configuration configuration, - bool preserveAlpha) - { - this.bounds = bounds; - this.targetPixels = targetPixels; - this.sourcePixels = sourcePixels; - this.map = map; - this.kernelMatrix = kernelMatrix; - this.configuration = configuration; - this.preserveAlpha = preserveAlpha; - } - - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Invoke(int y, Span span) - { - if (this.preserveAlpha) - { - this.Convolve3(y, span); - } - else - { - this.Convolve4(y, span); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void Convolve3(int y, Span span) - { - // Span is 2x bounds. - int boundsX = this.bounds.X; - int boundsWidth = this.bounds.Width; - Span sourceBuffer = span.Slice(0, this.bounds.Width); - Span targetBuffer = span.Slice(this.bounds.Width); - - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); - - // Clear the target buffer for each row run. - targetBuffer.Clear(); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - - ReadOnlyKernel kernel = state.Kernel; - Span sourceRow; - for (int kY = 0; kY < kernel.Rows; kY++) - { - // Get the precalculated source sample row for this kernel row and copy to our buffer. - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); - PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - - for (int x = 0; x < sourceBuffer.Length; x++) - { - ref int sampleColumnBase = ref state.GetSampleColumn(x); - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - target += kernel[kY, kX] * sample; - } - } - } - - // Now we need to copy the original alpha values from the source row. - sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - - for (int x = 0; x < sourceRow.Length; x++) - { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; - } - - Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void Convolve4(int y, Span span) - { - // Span is 2x bounds. - int boundsX = this.bounds.X; - int boundsWidth = this.bounds.Width; - Span sourceBuffer = span.Slice(0, this.bounds.Width); - Span targetBuffer = span.Slice(this.bounds.Width); - - var state = new ConvolutionState(in this.kernelMatrix, this.map); - ref int sampleRowBase = ref state.GetSampleRow(y - this.bounds.Y); - - // Clear the target buffer for each row run. - targetBuffer.Clear(); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - - ReadOnlyKernel kernel = state.Kernel; - for (int kY = 0; kY < kernel.Rows; kY++) - { - // Get the precalculated source sample row for this kernel row and copy to our buffer. - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); - PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - - Numerics.Premultiply(sourceBuffer); - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - - for (int x = 0; x < sourceBuffer.Length; x++) - { - ref int sampleColumnBase = ref state.GetSampleColumn(x); - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - - for (int kX = 0; kX < kernel.Columns; kX++) - { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - target += kernel[kY, kX] * sample; - } - } - } - - Numerics.UnPremultiply(targetBuffer); - - Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); - PixelOperations.Instance.FromVector4Destructive(this.configuration, targetBuffer, targetRow); - } - } -} From f88fca750cf3a27cc55f99f48b9ea30197aef4e3 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 19:37:25 +0100 Subject: [PATCH 14/19] Minor code tweaks --- .../Convolution/Convolution2PassProcessor{TPixel}.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index 9b7ed7580..d407e551e 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -155,13 +155,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Clear the target buffer for each row run. targetBuffer.Clear(); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Get the precalculated source sample row for this kernel row and copy to our buffer. Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); ref float kernelBase = ref this.kernel[0]; ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); @@ -209,7 +209,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Clear the target buffer for each row run. targetBuffer.Clear(); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Get the precalculated source sample row for this kernel row and copy to our buffer. Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); @@ -218,6 +217,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Numerics.Premultiply(sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); ref float kernelBase = ref this.kernel[0]; ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); From ff86b651d28c27135a0ab082ed8722aa44151e7c Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 22:07:12 +0100 Subject: [PATCH 15/19] More performance improvements to 2 pass convolution --- .../Convolution2PassProcessor{TPixel}.cs | 116 +++++++++++------- 1 file changed, 72 insertions(+), 44 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs index d407e551e..365b2e2df 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/Convolution2PassProcessor{TPixel}.cs @@ -161,24 +161,28 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ref Vector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer); + ref Vector4 targetEnd = ref Unsafe.Add(ref targetStart, sourceBuffer.Length); ref float kernelBase = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize); ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); - for (int x = 0; x < sourceBuffer.Length; x++) + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + ref float kernelStart = ref kernelBase; + ref int sampleColumnStart = ref sampleColumnBase; - for (int kX = 0; kX < kernelSize; kX++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - float factor = Unsafe.Add(ref kernelBase, kX); + Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX); + + targetStart += kernelStart * sample; - target += factor * sample; + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1); } - // Shift the base column sampling reference by one row + targetStart = ref Unsafe.Add(ref targetStart, 1); sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); } @@ -186,10 +190,14 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - for (int x = 0; x < sourceRow.Length; x++) + targetStart = ref MemoryMarshal.GetReference(targetBuffer); + + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + targetStart.W = sourceBase.W; + + targetStart = ref Unsafe.Add(ref targetStart, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); } Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); @@ -217,23 +225,28 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution Numerics.Premultiply(sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); + ref Vector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer); + ref Vector4 targetEnd = ref Unsafe.Add(ref targetStart, sourceBuffer.Length); ref float kernelBase = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize); ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); - for (int x = 0; x < sourceBuffer.Length; x++) + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); + ref float kernelStart = ref kernelBase; + ref int sampleColumnStart = ref sampleColumnBase; - for (int kX = 0; kX < kernelSize; kX++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - float factor = Unsafe.Add(ref kernelBase, kX); + Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX); - target += factor * sample; + targetStart += kernelStart * sample; + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1); } + targetStart = ref Unsafe.Add(ref targetStart, 1); sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); } @@ -307,37 +320,48 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - ref float kernelBase = ref this.kernel[0]; + ref float kernelStart = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize); Span sourceRow; - for (int kY = 0; kY < kernelSize; kY++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { // Get the precalculated source sample row for this kernel row and copy to our buffer. - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + sourceRow = this.sourcePixels.GetRowSpan(sampleRowBase).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - float factor = Unsafe.Add(ref kernelBase, kY); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length); + ref Vector4 targetStart = ref targetBase; + float factor = kernelStart; - for (int x = 0; x < sourceBuffer.Length; x++) + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - Vector4 sample = Unsafe.Add(ref sourceBase, x); + targetStart += factor * sourceBase; - target += factor * sample; + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + targetStart = ref Unsafe.Add(ref targetStart, 1); } + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1); } // Now we need to copy the original alpha values from the source row. sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - - for (int x = 0; x < sourceRow.Length; x++) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - target.W = Unsafe.Add(ref MemoryMarshal.GetReference(sourceBuffer), x).W; + ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length); + + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) + { + targetBase.W = sourceBase.W; + + targetBase = ref Unsafe.Add(ref targetBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } } Span targetRow = this.targetPixels.GetRowSpan(y).Slice(boundsX, boundsWidth); @@ -361,28 +385,32 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution targetBuffer.Clear(); ref Vector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); - ref float kernelBase = ref this.kernel[0]; + ref float kernelStart = ref this.kernel[0]; + ref float kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize); - for (int kY = 0; kY < kernelSize; kY++) + Span sourceRow; + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { // Get the precalculated source sample row for this kernel row and copy to our buffer. - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - Span sourceRow = this.sourcePixels.GetRowSpan(sampleY).Slice(boundsX, boundsWidth); + sourceRow = this.sourcePixels.GetRowSpan(sampleRowBase).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, sourceBuffer); - Numerics.Premultiply(sourceBuffer); - ref Vector4 sourceBase = ref MemoryMarshal.GetReference(sourceBuffer); - float factor = Unsafe.Add(ref kernelBase, kY); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceBase, sourceBuffer.Length); + ref Vector4 targetStart = ref targetBase; + float factor = kernelStart; - for (int x = 0; x < sourceBuffer.Length; x++) + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - Vector4 sample = Unsafe.Add(ref sourceBase, x); + targetStart += factor * sourceBase; - target += factor * sample; + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + targetStart = ref Unsafe.Add(ref targetStart, 1); } + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1); } Numerics.UnPremultiply(targetBuffer); From a5bbe95e950e48ba450d7cf9ca0f0311c6984c81 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 22:49:32 +0100 Subject: [PATCH 16/19] More codegen improvements to bokeh blur --- .../Convolution/BokehBlurProcessor.cs | 26 ++++++++++++------- .../Convolution/BokehBlurProcessor{TPixel}.cs | 21 +++++++++------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs index 55cef5df5..13fe627d1 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs @@ -134,23 +134,29 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // The target buffer is zeroed initially and then it accumulates the results // of each partial convolution, so we don't have to clear it here as well ref Vector4 targetBase = ref this.targetValues.GetElementUnsafe(boundsX, y); - ref Complex64 kernelBase = ref this.kernel[0]; + ref Complex64 kernelStart = ref this.kernel[0]; + ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize); - for (int kY = 0; kY < kernelSize; kY++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { // Get the precalculated source sample row for this kernel row and copy to our buffer - int sampleY = Unsafe.Add(ref sampleRowBase, kY); - ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleY); - Complex64 factor = Unsafe.Add(ref kernelBase, kY); + ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleRowBase); + ref ComplexVector4 sourceEnd = ref Unsafe.Add(ref sourceBase, boundsWidth); + ref Vector4 targetStart = ref targetBase; + Complex64 factor = kernelStart; - for (int x = 0; x < boundsWidth; x++) + while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd)) { - ref Vector4 target = ref Unsafe.Add(ref targetBase, x); - ComplexVector4 sample = Unsafe.Add(ref sourceBase, x); - ComplexVector4 partial = factor * sample; + ComplexVector4 partial = factor * sourceBase; - target += partial.WeightedSum(this.z, this.w); + targetStart += partial.WeightedSum(this.z, this.w); + + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + targetStart = ref Unsafe.Add(ref targetStart, 1); } + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1); } } } diff --git a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs index a21155e10..241ff9db2 100644 --- a/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs @@ -233,32 +233,37 @@ namespace SixLabors.ImageSharp.Processing.Processors.Convolution // Clear the target buffer for each row run Span targetBuffer = this.targetValues.GetRowSpan(y); targetBuffer.Clear(); - ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer); // Execute the bulk pixel format conversion for the current row Span sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth); PixelOperations.Instance.ToVector4(this.configuration, sourceRow, span); ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span); + ref ComplexVector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer); + ref ComplexVector4 targetEnd = ref Unsafe.Add(ref targetStart, span.Length); ref Complex64 kernelBase = ref this.kernel[0]; + ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize); ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan()); - for (int x = 0; x < span.Length; x++) + while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd)) { - ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x); + ref Complex64 kernelStart = ref kernelBase; + ref int sampleColumnStart = ref sampleColumnBase; - for (int kX = 0; kX < kernelSize; kX++) + while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd)) { - int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX; - Vector4 sample = Unsafe.Add(ref sourceBase, sampleX); - Complex64 factor = Unsafe.Add(ref kernelBase, kX); + Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX); - target.Sum(factor * sample); + targetStart.Sum(kernelStart * sample); + + kernelStart = ref Unsafe.Add(ref kernelStart, 1); + sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1); } // Shift the base column sampling reference by one row at the end of each outer // iteration so that the inner tight loop indexing can skip the multiplication sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize); + targetStart = ref Unsafe.Add(ref targetStart, 1); } } } From aab2837d28ff64f2e9ee92fd0fc49e3f41c5bbd6 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 23:16:26 +0100 Subject: [PATCH 17/19] More codegen improvements to shared methods --- .../ColorSpaces/Companding/SRgbCompanding.cs | 24 ++- src/ImageSharp/Common/Helpers/Numerics.cs | 171 ++++++++++-------- .../Utils/Vector4Converters.Default.cs | 50 ++--- 3 files changed, 138 insertions(+), 107 deletions(-) diff --git a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs index 2e212ad19..9a8b5f0a8 100644 --- a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs +++ b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; @@ -25,12 +25,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding [MethodImpl(InliningOptions.ShortMethod)] public static void Expand(Span vectors) { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Expand(ref v); + Expand(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } @@ -41,12 +43,14 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding [MethodImpl(InliningOptions.ShortMethod)] public static void Compress(Span vectors) { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Compress(ref v); + Compress(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } @@ -90,4 +94,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding [MethodImpl(InliningOptions.ShortMethod)] public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F; } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 56ab46c68..99d91168b 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -41,13 +41,11 @@ namespace SixLabors.ImageSharp /// /// Determine the Least Common Multiple (LCM) of two numbers. + /// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int LeastCommonMultiple(int a, int b) - { - // https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor - return (a / GreatestCommonDivisor(a, b)) * b; - } + => a / GreatestCommonDivisor(a, b) * b; /// /// Calculates % 2 @@ -290,10 +288,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref byte remainderStart = ref MemoryMarshal.GetReference(remainder); + ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref byte v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -311,10 +313,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref uint remainderStart = ref MemoryMarshal.GetReference(remainder); + ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref uint v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -332,10 +338,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref int remainderStart = ref MemoryMarshal.GetReference(remainder); + ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref int v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -353,10 +363,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref float remainderStart = ref MemoryMarshal.GetReference(remainder); + ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref float v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -374,10 +388,14 @@ namespace SixLabors.ImageSharp if (remainder.Length > 0) { - for (int i = 0; i < remainder.Length; i++) + ref double remainderStart = ref MemoryMarshal.GetReference(remainder); + ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length); + + while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd)) { - ref double v = ref remainder[i]; - v = Clamp(v, min, max); + remainderStart = Clamp(remainderStart, min, max); + + remainderStart = ref Unsafe.Add(ref remainderStart, 1); } } } @@ -472,10 +490,8 @@ namespace SixLabors.ImageSharp #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported && vectors.Length >= 2) { - ref Vector256 vectorsBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) @@ -495,12 +511,14 @@ namespace SixLabors.ImageSharp else #endif { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + Premultiply(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } } @@ -515,10 +533,8 @@ namespace SixLabors.ImageSharp #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported && vectors.Length >= 2) { - ref Vector256 vectorsBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) @@ -538,12 +554,14 @@ namespace SixLabors.ImageSharp else #endif { - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length); - for (int i = 0; i < vectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd)) { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + UnPremultiply(ref vectorsStart); + + vectorsStart = ref Unsafe.Add(ref vectorsStart, 1); } } } @@ -633,53 +651,54 @@ namespace SixLabors.ImageSharp vectors128Ref = y4; vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1); } - - return; } + else #endif - ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors); - ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length); - - // Fallback with scalar preprocessing and vectorized approximation steps - while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd)) { - Vector4 v = vectorsRef; - - double - x64 = v.X, - y64 = v.Y, - z64 = v.Z; - float a = v.W; + ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors); + ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length); - ulong - xl = *(ulong*)&x64, - yl = *(ulong*)&y64, - zl = *(ulong*)&z64; - - // Here we use a trick to compute the starting value x0 for the cube root. This is because doing - // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case, - // this means what we actually want is to find the cube root of our clamped values. - // For more info on the constant below, see: - // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543. - // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and - // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit - // register, and use it to accelerate two steps of the Newton approximation using SIMD. - xl = 0x2a9f8a7be393b600 + (xl / 3); - yl = 0x2a9f8a7be393b600 + (yl / 3); - zl = 0x2a9f8a7be393b600 + (zl / 3); - - Vector4 y4; - y4.X = (float)*(double*)&xl; - y4.Y = (float)*(double*)&yl; - y4.Z = (float)*(double*)&zl; - y4.W = 0; - - y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); - y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); - y4.W = a; - - vectorsRef = y4; - vectorsRef = ref Unsafe.Add(ref vectorsRef, 1); + // Fallback with scalar preprocessing and vectorized approximation steps + while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd)) + { + Vector4 v = vectorsRef; + + double + x64 = v.X, + y64 = v.Y, + z64 = v.Z; + float a = v.W; + + ulong + xl = *(ulong*)&x64, + yl = *(ulong*)&y64, + zl = *(ulong*)&z64; + + // Here we use a trick to compute the starting value x0 for the cube root. This is because doing + // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case, + // this means what we actually want is to find the cube root of our clamped values. + // For more info on the constant below, see: + // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543. + // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and + // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit + // register, and use it to accelerate two steps of the Newton approximation using SIMD. + xl = 0x2a9f8a7be393b600 + (xl / 3); + yl = 0x2a9f8a7be393b600 + (yl / 3); + zl = 0x2a9f8a7be393b600 + (zl / 3); + + Vector4 y4; + y4.X = (float)*(double*)&xl; + y4.Y = (float)*(double*)&yl; + y4.Z = (float)*(double*)&zl; + y4.W = 0; + + y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); + y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4))); + y4.W = a; + + vectorsRef = y4; + vectorsRef = ref Unsafe.Add(ref vectorsRef, 1); + } } } } diff --git a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs index 999f6325b..6b6ff4319 100644 --- a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs +++ b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.Default.cs @@ -88,14 +88,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destPixels) where TPixel : unmanaged, IPixel { - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length); ref TPixel destRef = ref MemoryMarshal.GetReference(destPixels); - for (int i = 0; i < sourceVectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i); - ref TPixel dp = ref Unsafe.Add(ref destRef, i); - dp.FromVector4(sp); + destRef.FromVector4(sourceStart); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -105,14 +107,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destVectors) where TPixel : unmanaged, IPixel { - ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourcePixels); + ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourcePixels); + ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourcePixels.Length); ref Vector4 destRef = ref MemoryMarshal.GetReference(destVectors); - for (int i = 0; i < sourcePixels.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref TPixel sp = ref Unsafe.Add(ref sourceRef, i); - ref Vector4 dp = ref Unsafe.Add(ref destRef, i); - dp = sp.ToVector4(); + destRef = sourceStart.ToVector4(); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -122,14 +126,16 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destinationColors) where TPixel : unmanaged, IPixel { - ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceStart = ref MemoryMarshal.GetReference(sourceVectors); + ref Vector4 sourceEnd = ref Unsafe.Add(ref sourceStart, sourceVectors.Length); ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors); - for (int i = 0; i < sourceVectors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i); - ref TPixel dp = ref Unsafe.Add(ref destRef, i); - dp.FromScaledVector4(sp); + destRef.FromScaledVector4(sourceStart); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } @@ -139,16 +145,18 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils Span destinationVectors) where TPixel : unmanaged, IPixel { - ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors); + ref TPixel sourceStart = ref MemoryMarshal.GetReference(sourceColors); + ref TPixel sourceEnd = ref Unsafe.Add(ref sourceStart, sourceColors.Length); ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors); - for (int i = 0; i < sourceColors.Length; i++) + while (Unsafe.IsAddressLessThan(ref sourceStart, ref sourceEnd)) { - ref TPixel sp = ref Unsafe.Add(ref sourceRef, i); - ref Vector4 dp = ref Unsafe.Add(ref destRef, i); - dp = sp.ToScaledVector4(); + destRef = sourceStart.ToScaledVector4(); + + sourceStart = ref Unsafe.Add(ref sourceStart, 1); + destRef = ref Unsafe.Add(ref destRef, 1); } } } } -} \ No newline at end of file +} From 14d2af4bdd9e6a4de7e92053e3ff179470e16e51 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 15 Dec 2020 23:52:44 +0100 Subject: [PATCH 18/19] Codegen improvements to Numerics.Clamp --- src/ImageSharp/Common/Helpers/Numerics.cs | 27 +++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 99d91168b..b9ccfafe0 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -425,7 +425,6 @@ namespace SixLabors.ImageSharp where T : unmanaged { ref T sRef = ref MemoryMarshal.GetReference(span); - ref Vector vsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(span)); var vmin = new Vector(min); var vmax = new Vector(max); @@ -433,25 +432,35 @@ namespace SixLabors.ImageSharp int m = Modulo4(n); int u = n - m; - for (int i = 0; i < u; i += 4) - { - ref Vector vs0 = ref Unsafe.Add(ref vsBase, i); - ref Vector vs1 = ref Unsafe.Add(ref vs0, 1); - ref Vector vs2 = ref Unsafe.Add(ref vs0, 2); - ref Vector vs3 = ref Unsafe.Add(ref vs0, 3); + ref Vector vs0 = ref Unsafe.As>(ref MemoryMarshal.GetReference(span)); + ref Vector vs1 = ref Unsafe.Add(ref vs0, 1); + ref Vector vs2 = ref Unsafe.Add(ref vs0, 2); + ref Vector vs3 = ref Unsafe.Add(ref vs0, 3); + ref Vector vsEnd = ref Unsafe.Add(ref vs0, u); + while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd)) + { vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax); vs1 = Vector.Min(Vector.Max(vmin, vs1), vmax); vs2 = Vector.Min(Vector.Max(vmin, vs2), vmax); vs3 = Vector.Min(Vector.Max(vmin, vs3), vmax); + + vs0 = ref Unsafe.Add(ref vs0, 4); + vs1 = ref Unsafe.Add(ref vs1, 4); + vs2 = ref Unsafe.Add(ref vs2, 4); + vs3 = ref Unsafe.Add(ref vs3, 4); } if (m > 0) { - for (int i = u; i < n; i++) + vs0 = ref vsEnd; + vsEnd = ref Unsafe.Add(ref vsEnd, m); + + while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd)) { - ref Vector vs0 = ref Unsafe.Add(ref vsBase, i); vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax); + + vs0 = ref Unsafe.Add(ref vs0, 1); } } } From fc468344e60eb8791a19dba2c4a91f8ad96ce8d4 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 16 Dec 2020 00:42:33 +0000 Subject: [PATCH 19/19] Fix link + other docs tweaks. --- .github/CONTRIBUTING.md | 2 +- .github/ISSUE_TEMPLATE/config.yml | 2 +- .github/workflows/build-and-test.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 89d1a75f2..0943ab4eb 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -29,7 +29,7 @@ #### **Running tests and Debugging** * Expected test output is pulled in as a submodule from the [ImageSharp.Tests.Images repository](https://github.com/SixLabors/Imagesharp.Tests.Images/tree/master/ReferenceOutput). To succesfully run tests, make sure that you have updated the submodules! -* Debugging (running tests in Debug mode) is only supported on .NET Core 2.1, because of JIT Code Generation bugs like [dotnet/coreclr#16443](https://github.com/dotnet/coreclr/issues/16443) or [dotnet/coreclr#20657](https://github.com/dotnet/coreclr/issues/20657) +* Debugging (running tests in Debug mode) is only supported on .NET Core 2.1+, because of JIT Code Generation bugs like [dotnet/coreclr#16443](https://github.com/dotnet/coreclr/issues/16443) or [dotnet/coreclr#20657](https://github.com/dotnet/coreclr/issues/20657) #### **Do you have questions about consuming the library or the source code?** diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index cf9f78752..1326c72e8 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,7 +1,7 @@ blank_issues_enabled: false contact_links: - name: Ask a Question - url: https://github.com/SixLabors/ImageSharp/discussions?discussions_q=category%3AHelp + url: https://github.com/SixLabors/ImageSharp/discussions?discussions_q=category%3AQ%26A about: Ask a question about this project. - name: Feature Request url: https://github.com/SixLabors/ImageSharp/discussions?discussions_q=category%3AIdeas diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 2876682bc..9e760c4b2 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -75,7 +75,7 @@ jobs: CI: True XUNIT_PATH: .\tests\ImageSharp.Tests # Required for xunit - - name: Store Output Images after failed tests + - name: Export Failed Output uses: actions/upload-artifact@v2 if: failure() with: