From 5cccd6cf3e086c59ea44dbc82d963862da688675 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 20 Jan 2026 15:46:11 +1000 Subject: [PATCH] Use aligned memory for all pooled allocations --- .../Allocators/Internals/ManagedBufferBase.cs | 12 +- .../Allocators/Internals/MemoryUtilities.cs | 123 ++++++++++++++++++ .../RefCountedMemoryLifetimeGuard.cs | 4 +- .../Internals/SharedArrayPoolBuffer{T}.cs | 33 +++-- .../Internals/UnmanagedMemoryHandle.cs | 19 ++- .../MemoryGroupSpanCache.cs | 8 +- .../DiscontiguousBuffers/MemoryGroup{T}.cs | 8 +- 7 files changed, 187 insertions(+), 20 deletions(-) create mode 100644 src/ImageSharp/Memory/Allocators/Internals/MemoryUtilities.cs diff --git a/src/ImageSharp/Memory/Allocators/Internals/ManagedBufferBase.cs b/src/ImageSharp/Memory/Allocators/Internals/ManagedBufferBase.cs index a6ed797d6..6256d5251 100644 --- a/src/ImageSharp/Memory/Allocators/Internals/ManagedBufferBase.cs +++ b/src/ImageSharp/Memory/Allocators/Internals/ManagedBufferBase.cs @@ -16,6 +16,15 @@ internal abstract class ManagedBufferBase : MemoryManager { private GCHandle pinHandle; + /// + /// Gets the element offset (in units) from the pinned object start to the first element + /// of the span returned by . + /// + /// + /// This exists to support buffers that return a trimmed/sliced span (eg, for alignment). + /// + protected virtual int GetPinnableElementOffset() => 0; + /// public override unsafe MemoryHandle Pin(int elementIndex = 0) { @@ -24,7 +33,8 @@ internal abstract class ManagedBufferBase : MemoryManager this.pinHandle = GCHandle.Alloc(this.GetPinnableObject(), GCHandleType.Pinned); } - void* ptr = Unsafe.Add((void*)this.pinHandle.AddrOfPinnedObject(), elementIndex); + int baseIndex = this.GetPinnableElementOffset(); + void* ptr = Unsafe.Add((void*)this.pinHandle.AddrOfPinnedObject(), baseIndex + elementIndex); // We should only pass pinnable:this, when GCHandle lifetime is managed by the MemoryManager instance. return new MemoryHandle(ptr, pinnable: this); diff --git a/src/ImageSharp/Memory/Allocators/Internals/MemoryUtilities.cs b/src/ImageSharp/Memory/Allocators/Internals/MemoryUtilities.cs new file mode 100644 index 000000000..4a6a4c5e0 --- /dev/null +++ b/src/ImageSharp/Memory/Allocators/Internals/MemoryUtilities.cs @@ -0,0 +1,123 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace SixLabors.ImageSharp.Memory.Allocators.Internals; + +/// +/// Utilities for memory alignment and related operations. +/// +internal static class MemoryUtilities +{ + /// + /// Returns the recommended memory alignment, in bytes, for optimal SIMD operations on the current hardware + /// platform. + /// + /// + /// Use this value when allocating memory buffers intended for SIMD processing to help achieve optimal + /// performance. The returned alignment corresponds to the preferred alignment characteristics of the most + /// advanced SIMD instruction set supported by the processor, such as AVX-512, AVX2, SSE2, or ARM64 NEON. + /// + /// + /// A value, in bytes, representing the alignment boundary that should be used for efficient vectorized operations. + /// The value is always a power of two and reflects the largest supported SIMD instruction set available at runtime. + /// + public static nuint GetAlignment() + { + if (Vector512.IsHardwareAccelerated) + { + return (nuint)Vector512.Count; // 64 + } + + if (Vector256.IsHardwareAccelerated) + { + return (nuint)Vector256.Count; // 32 + } + + if (Vector128.IsHardwareAccelerated) + { + return (nuint)Vector128.Count; // 16 + } + + // Safe fallback. Alignment must be power-of-two. + return 16; + } + + /// + /// Returns a span of elements over , trimmed so the first element + /// begins at an address aligned to . + /// + /// The backing byte array. + /// The number of elements in the returned span. + /// + /// Callers must rent/provide with enough slack (alignment - 1 bytes) so the trimmed slice + /// always fits. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Span GetAlignedSpan(byte[] buffer, int length) + where T : struct + { + int lengthInBytes = checked(length * Unsafe.SizeOf()); + int offsetBytes = GetAlignedOffsetBytes(buffer); + + return MemoryMarshal.Cast(buffer.AsSpan(offsetBytes, lengthInBytes)); + } + + /// + /// Computes the byte offset required to align a sliced view of to the + /// alignment returned by . + /// + /// + /// This method is intended for use with pooled managed arrays where the exposed span must begin at an + /// aligned address. The returned offset is the number of leading bytes that should be skipped so that + /// the first element of a begins at an aligned boundary. + /// + /// This method does not pin the array. If the array may move during use, callers that require a stable + /// aligned address must compute the offset from the pinned base address instead. + /// + /// + /// The element type that the buffer will be reinterpreted as. The computed offset is guaranteed to be + /// compatible with . + /// + /// The backing byte array. + /// + /// The number of bytes to skip from the start of to reach the next aligned + /// element boundary. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static unsafe int GetAlignedOffsetBytes(byte[] buffer) + where T : struct + { + nuint alignment = GetAlignment(); + int elementSize = Unsafe.SizeOf(); + + // Compute a mask for rounding addresses up to the next alignment boundary. + // Example: alignment = 64 -> mask = 0b0011_1111 + nuint mask = alignment - 1; + + // Obtain the address of the first byte in the array. + ref byte r0 = ref MemoryMarshal.GetArrayDataReference(buffer); + nuint baseAddr = (nuint)Unsafe.AsPointer(ref r0); + + // Round the base address up to the next aligned address. + // This is a standard power-of-two alignment operation: + // aligned = (addr + (alignment - 1)) & ~(alignment - 1) + nuint alignedAddr = (baseAddr + mask) & ~mask; + + // Compute the byte offset needed to reach the aligned address. + nuint offset = alignedAddr - baseAddr; + + // Ensure the offset is a multiple of sizeof(T), which is required for + // MemoryMarshal.Cast to be valid. + nuint rem = offset % (nuint)elementSize; + if (rem != 0) + { + offset += (nuint)elementSize - rem; + } + + return (int)offset; + } +} diff --git a/src/ImageSharp/Memory/Allocators/Internals/RefCountedMemoryLifetimeGuard.cs b/src/ImageSharp/Memory/Allocators/Internals/RefCountedMemoryLifetimeGuard.cs index 4a202a96c..32f8677cf 100644 --- a/src/ImageSharp/Memory/Allocators/Internals/RefCountedMemoryLifetimeGuard.cs +++ b/src/ImageSharp/Memory/Allocators/Internals/RefCountedMemoryLifetimeGuard.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. using SixLabors.ImageSharp.Diagnostics; @@ -14,7 +14,7 @@ internal abstract class RefCountedMemoryLifetimeGuard : IDisposable private int refCount = 1; private int disposed; private int released; - private string? allocationStackTrace; + private readonly string? allocationStackTrace; protected RefCountedMemoryLifetimeGuard() { diff --git a/src/ImageSharp/Memory/Allocators/Internals/SharedArrayPoolBuffer{T}.cs b/src/ImageSharp/Memory/Allocators/Internals/SharedArrayPoolBuffer{T}.cs index 02bdf0f48..23a00a0c7 100644 --- a/src/ImageSharp/Memory/Allocators/Internals/SharedArrayPoolBuffer{T}.cs +++ b/src/ImageSharp/Memory/Allocators/Internals/SharedArrayPoolBuffer{T}.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Memory.Allocators.Internals; namespace SixLabors.ImageSharp.Memory.Internals; @@ -13,17 +14,30 @@ internal class SharedArrayPoolBuffer : ManagedBufferBase, IRefCounted where T : struct { private readonly int lengthInBytes; + private readonly int alignedOffsetElements; + +#pragma warning disable IDE0044 // Add readonly modifier private LifetimeGuard lifetimeGuard; +#pragma warning restore IDE0044 // Add readonly modifier public SharedArrayPoolBuffer(int lengthInElements) { - this.lengthInBytes = lengthInElements * Unsafe.SizeOf(); - this.Array = ArrayPool.Shared.Rent(this.lengthInBytes); + this.lengthInBytes = checked(lengthInElements * Unsafe.SizeOf()); + nuint alignment = MemoryUtilities.GetAlignment(); + + // Rent slack so we can advance the exposed span start to the next aligned address. + this.Array = ArrayPool.Shared.Rent(checked(this.lengthInBytes + (int)alignment - 1)); + + int offsetBytes = MemoryUtilities.GetAlignedOffsetBytes(this.Array); + this.alignedOffsetElements = offsetBytes / Unsafe.SizeOf(); + this.lifetimeGuard = new LifetimeGuard(this.Array); } public byte[]? Array { get; private set; } + public int AlignedOffsetBytes => this.alignedOffsetElements * Unsafe.SizeOf(); + protected override void Dispose(bool disposing) { if (this.Array == null) @@ -38,9 +52,17 @@ internal class SharedArrayPoolBuffer : ManagedBufferBase, IRefCounted public override Span GetSpan() { this.CheckDisposed(); - return MemoryMarshal.Cast(this.Array.AsSpan(0, this.lengthInBytes)); + + // Expose only the aligned slice, never the full rented buffer. + // Use the stored offset so the span base does not depend on recomputing alignment at call time. + int offsetBytes = this.AlignedOffsetBytes; + + Span bytes = this.Array.AsSpan(offsetBytes, this.lengthInBytes); + return MemoryMarshal.Cast(bytes); } + protected override int GetPinnableElementOffset() => this.alignedOffsetElements; + protected override object GetPinnableObject() { this.CheckDisposed(); @@ -67,11 +89,6 @@ internal class SharedArrayPoolBuffer : ManagedBufferBase, IRefCounted protected override void Release() { - // If this is called by a finalizer, we will end storing the first array of this bucket - // on the thread local storage of the finalizer thread. - // This is not ideal, but subsequent leaks will end up returning arrays to per-cpu buckets, - // meaning likely a different bucket than it was rented from, - // but this is PROBABLY better than not returning the arrays at all. ArrayPool.Shared.Return(this.array!); this.array = null; } diff --git a/src/ImageSharp/Memory/Allocators/Internals/UnmanagedMemoryHandle.cs b/src/ImageSharp/Memory/Allocators/Internals/UnmanagedMemoryHandle.cs index 632e1bec0..1dc999219 100644 --- a/src/ImageSharp/Memory/Allocators/Internals/UnmanagedMemoryHandle.cs +++ b/src/ImageSharp/Memory/Allocators/Internals/UnmanagedMemoryHandle.cs @@ -2,6 +2,7 @@ // Licensed under the Six Labors Split License. using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Memory.Allocators.Internals; namespace SixLabors.ImageSharp.Memory.Internals; @@ -13,6 +14,9 @@ internal struct UnmanagedMemoryHandle : IEquatable // Number of allocation re-attempts when detecting OutOfMemoryException. private const int MaxAllocationAttempts = 10; + // Alignment for allocated memory blocks. + private static readonly nuint Alignment = MemoryUtilities.GetAlignment(); + // Track allocations for testing purposes: private static int totalOutstandingHandles; @@ -67,15 +71,16 @@ internal struct UnmanagedMemoryHandle : IEquatable return new UnmanagedMemoryHandle(handle, lengthInBytes); } - private static IntPtr AllocateHandle(int lengthInBytes) + private static unsafe IntPtr AllocateHandle(int lengthInBytes) { int counter = 0; - IntPtr handle = IntPtr.Zero; - while (handle == IntPtr.Zero) + void* ptr = null; + + while (ptr is null) { try { - handle = Marshal.AllocHGlobal(lengthInBytes); + ptr = NativeMemory.AlignedAlloc((nuint)lengthInBytes, Alignment); } catch (OutOfMemoryException) when (counter < MaxAllocationAttempts) { @@ -91,10 +96,10 @@ internal struct UnmanagedMemoryHandle : IEquatable } } - return handle; + return (IntPtr)ptr; } - public void Free() + public unsafe void Free() { IntPtr h = Interlocked.Exchange(ref this.handle, IntPtr.Zero); @@ -103,7 +108,7 @@ internal struct UnmanagedMemoryHandle : IEquatable return; } - Marshal.FreeHGlobal(h); + NativeMemory.AlignedFree((void*)h); Interlocked.Decrement(ref totalOutstandingHandles); if (this.lengthInBytes > 0) { diff --git a/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroupSpanCache.cs b/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroupSpanCache.cs index dd3e67c37..271709075 100644 --- a/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroupSpanCache.cs +++ b/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroupSpanCache.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. using System.Buffers; @@ -13,7 +13,12 @@ namespace SixLabors.ImageSharp.Memory; internal unsafe struct MemoryGroupSpanCache { public SpanCacheMode Mode; + + // Managed backing public byte[]? SingleArray; + public int SingleArrayOffsetBytes; + + // Unmanaged backing public void* SinglePointer; public void*[] MultiPointer; @@ -28,6 +33,7 @@ internal unsafe struct MemoryGroupSpanCache { memoryGroupSpanCache.Mode = SpanCacheMode.SingleArray; memoryGroupSpanCache.SingleArray = sharedPoolBuffer.Array; + memoryGroupSpanCache.SingleArrayOffsetBytes = sharedPoolBuffer.AlignedOffsetBytes; } else if (owner0 is UnmanagedBuffer unmanagedBuffer) { diff --git a/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroup{T}.cs b/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroup{T}.cs index 6dd99fcb0..4687ba60c 100644 --- a/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroup{T}.cs +++ b/src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroup{T}.cs @@ -251,9 +251,15 @@ internal abstract partial class MemoryGroup : IMemoryGroup, IDisposable { case SpanCacheMode.SingleArray: { - ref byte b0 = ref MemoryMarshal.GetReference(this.memoryGroupSpanCache.SingleArray); + // SingleArray is the raw rented byte[] from SharedArrayPoolBuffer. + // The exposed T-span starts at an aligned byte offset within that array. + ref byte b0 = ref MemoryMarshal.GetArrayDataReference(this.memoryGroupSpanCache.SingleArray!); + + b0 = ref Unsafe.Add(ref b0, this.memoryGroupSpanCache.SingleArrayOffsetBytes); + ref T e0 = ref Unsafe.As(ref b0); e0 = ref Unsafe.Add(ref e0, (uint)(y * width)); + return MemoryMarshal.CreateSpan(ref e0, width); }