Browse Source

Use aligned memory for all pooled allocations

js/aligned-memory-experiments
James Jackson-South 3 weeks ago
parent
commit
5cccd6cf3e
  1. 12
      src/ImageSharp/Memory/Allocators/Internals/ManagedBufferBase.cs
  2. 123
      src/ImageSharp/Memory/Allocators/Internals/MemoryUtilities.cs
  3. 4
      src/ImageSharp/Memory/Allocators/Internals/RefCountedMemoryLifetimeGuard.cs
  4. 33
      src/ImageSharp/Memory/Allocators/Internals/SharedArrayPoolBuffer{T}.cs
  5. 19
      src/ImageSharp/Memory/Allocators/Internals/UnmanagedMemoryHandle.cs
  6. 8
      src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroupSpanCache.cs
  7. 8
      src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroup{T}.cs

12
src/ImageSharp/Memory/Allocators/Internals/ManagedBufferBase.cs

@ -16,6 +16,15 @@ internal abstract class ManagedBufferBase<T> : MemoryManager<T>
{
private GCHandle pinHandle;
/// <summary>
/// Gets the element offset (in <typeparamref name="T"/> units) from the pinned object start to the first element
/// of the span returned by <see cref="MemoryManager{T}.GetSpan"/>.
/// </summary>
/// <remarks>
/// This exists to support buffers that return a trimmed/sliced span (eg, for alignment).
/// </remarks>
protected virtual int GetPinnableElementOffset() => 0;
/// <inheritdoc />
public override unsafe MemoryHandle Pin(int elementIndex = 0)
{
@ -24,7 +33,8 @@ internal abstract class ManagedBufferBase<T> : MemoryManager<T>
this.pinHandle = GCHandle.Alloc(this.GetPinnableObject(), GCHandleType.Pinned);
}
void* ptr = Unsafe.Add<T>((void*)this.pinHandle.AddrOfPinnedObject(), elementIndex);
int baseIndex = this.GetPinnableElementOffset();
void* ptr = Unsafe.Add<T>((void*)this.pinHandle.AddrOfPinnedObject(), baseIndex + elementIndex);
// We should only pass pinnable:this, when GCHandle lifetime is managed by the MemoryManager<T> instance.
return new MemoryHandle(ptr, pinnable: this);

123
src/ImageSharp/Memory/Allocators/Internals/MemoryUtilities.cs

@ -0,0 +1,123 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
namespace SixLabors.ImageSharp.Memory.Allocators.Internals;
/// <summary>
/// Utilities for memory alignment and related operations.
/// </summary>
internal static class MemoryUtilities
{
/// <summary>
/// Returns the recommended memory alignment, in bytes, for optimal SIMD operations on the current hardware
/// platform.
/// </summary>
/// <remarks>
/// Use this value when allocating memory buffers intended for SIMD processing to help achieve optimal
/// performance. The returned alignment corresponds to the preferred alignment characteristics of the most
/// advanced SIMD instruction set supported by the processor, such as AVX-512, AVX2, SSE2, or ARM64 NEON.
/// </remarks>
/// <returns>
/// A value, in bytes, representing the alignment boundary that should be used for efficient vectorized operations.
/// The value is always a power of two and reflects the largest supported SIMD instruction set available at runtime.
/// </returns>
public static nuint GetAlignment()
{
if (Vector512.IsHardwareAccelerated)
{
return (nuint)Vector512<byte>.Count; // 64
}
if (Vector256.IsHardwareAccelerated)
{
return (nuint)Vector256<byte>.Count; // 32
}
if (Vector128.IsHardwareAccelerated)
{
return (nuint)Vector128<byte>.Count; // 16
}
// Safe fallback. Alignment must be power-of-two.
return 16;
}
/// <summary>
/// Returns a span of <paramref name="length"/> elements over <paramref name="buffer"/>, trimmed so the first element
/// begins at an address aligned to <see cref="GetAlignment"/>.
/// </summary>
/// <param name="buffer">The backing byte array.</param>
/// <param name="length">The number of elements in the returned span.</param>
/// <remarks>
/// Callers must rent/provide <paramref name="buffer"/> with enough slack (alignment - 1 bytes) so the trimmed slice
/// always fits.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Span<T> GetAlignedSpan<T>(byte[] buffer, int length)
where T : struct
{
int lengthInBytes = checked(length * Unsafe.SizeOf<T>());
int offsetBytes = GetAlignedOffsetBytes<T>(buffer);
return MemoryMarshal.Cast<byte, T>(buffer.AsSpan(offsetBytes, lengthInBytes));
}
/// <summary>
/// Computes the byte offset required to align a sliced view of <paramref name="buffer"/> to the
/// alignment returned by <see cref="GetAlignment"/>.
/// </summary>
/// <remarks>
/// This method is intended for use with pooled managed arrays where the exposed span must begin at an
/// aligned address. The returned offset is the number of leading bytes that should be skipped so that
/// the first element of a <see cref="Span{T}"/> begins at an aligned boundary.
///
/// This method does not pin the array. If the array may move during use, callers that require a stable
/// aligned address must compute the offset from the pinned base address instead.
/// </remarks>
/// <typeparam name="T">
/// The element type that the buffer will be reinterpreted as. The computed offset is guaranteed to be
/// compatible with <see cref="MemoryMarshal.Cast{TFrom, TTo}(Span{TFrom})"/>.
/// </typeparam>
/// <param name="buffer">The backing byte array.</param>
/// <returns>
/// The number of bytes to skip from the start of <paramref name="buffer"/> to reach the next aligned
/// element boundary.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe int GetAlignedOffsetBytes<T>(byte[] buffer)
where T : struct
{
nuint alignment = GetAlignment();
int elementSize = Unsafe.SizeOf<T>();
// Compute a mask for rounding addresses up to the next alignment boundary.
// Example: alignment = 64 -> mask = 0b0011_1111
nuint mask = alignment - 1;
// Obtain the address of the first byte in the array.
ref byte r0 = ref MemoryMarshal.GetArrayDataReference(buffer);
nuint baseAddr = (nuint)Unsafe.AsPointer(ref r0);
// Round the base address up to the next aligned address.
// This is a standard power-of-two alignment operation:
// aligned = (addr + (alignment - 1)) & ~(alignment - 1)
nuint alignedAddr = (baseAddr + mask) & ~mask;
// Compute the byte offset needed to reach the aligned address.
nuint offset = alignedAddr - baseAddr;
// Ensure the offset is a multiple of sizeof(T), which is required for
// MemoryMarshal.Cast<byte, T> to be valid.
nuint rem = offset % (nuint)elementSize;
if (rem != 0)
{
offset += (nuint)elementSize - rem;
}
return (int)offset;
}
}

4
src/ImageSharp/Memory/Allocators/Internals/RefCountedMemoryLifetimeGuard.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using SixLabors.ImageSharp.Diagnostics;
@ -14,7 +14,7 @@ internal abstract class RefCountedMemoryLifetimeGuard : IDisposable
private int refCount = 1;
private int disposed;
private int released;
private string? allocationStackTrace;
private readonly string? allocationStackTrace;
protected RefCountedMemoryLifetimeGuard()
{

33
src/ImageSharp/Memory/Allocators/Internals/SharedArrayPoolBuffer{T}.cs

@ -6,6 +6,7 @@ using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory.Allocators.Internals;
namespace SixLabors.ImageSharp.Memory.Internals;
@ -13,17 +14,30 @@ internal class SharedArrayPoolBuffer<T> : ManagedBufferBase<T>, IRefCounted
where T : struct
{
private readonly int lengthInBytes;
private readonly int alignedOffsetElements;
#pragma warning disable IDE0044 // Add readonly modifier
private LifetimeGuard lifetimeGuard;
#pragma warning restore IDE0044 // Add readonly modifier
public SharedArrayPoolBuffer(int lengthInElements)
{
this.lengthInBytes = lengthInElements * Unsafe.SizeOf<T>();
this.Array = ArrayPool<byte>.Shared.Rent(this.lengthInBytes);
this.lengthInBytes = checked(lengthInElements * Unsafe.SizeOf<T>());
nuint alignment = MemoryUtilities.GetAlignment();
// Rent slack so we can advance the exposed span start to the next aligned address.
this.Array = ArrayPool<byte>.Shared.Rent(checked(this.lengthInBytes + (int)alignment - 1));
int offsetBytes = MemoryUtilities.GetAlignedOffsetBytes<T>(this.Array);
this.alignedOffsetElements = offsetBytes / Unsafe.SizeOf<T>();
this.lifetimeGuard = new LifetimeGuard(this.Array);
}
public byte[]? Array { get; private set; }
public int AlignedOffsetBytes => this.alignedOffsetElements * Unsafe.SizeOf<T>();
protected override void Dispose(bool disposing)
{
if (this.Array == null)
@ -38,9 +52,17 @@ internal class SharedArrayPoolBuffer<T> : ManagedBufferBase<T>, IRefCounted
public override Span<T> GetSpan()
{
this.CheckDisposed();
return MemoryMarshal.Cast<byte, T>(this.Array.AsSpan(0, this.lengthInBytes));
// Expose only the aligned slice, never the full rented buffer.
// Use the stored offset so the span base does not depend on recomputing alignment at call time.
int offsetBytes = this.AlignedOffsetBytes;
Span<byte> bytes = this.Array.AsSpan(offsetBytes, this.lengthInBytes);
return MemoryMarshal.Cast<byte, T>(bytes);
}
protected override int GetPinnableElementOffset() => this.alignedOffsetElements;
protected override object GetPinnableObject()
{
this.CheckDisposed();
@ -67,11 +89,6 @@ internal class SharedArrayPoolBuffer<T> : ManagedBufferBase<T>, IRefCounted
protected override void Release()
{
// If this is called by a finalizer, we will end storing the first array of this bucket
// on the thread local storage of the finalizer thread.
// This is not ideal, but subsequent leaks will end up returning arrays to per-cpu buckets,
// meaning likely a different bucket than it was rented from,
// but this is PROBABLY better than not returning the arrays at all.
ArrayPool<byte>.Shared.Return(this.array!);
this.array = null;
}

19
src/ImageSharp/Memory/Allocators/Internals/UnmanagedMemoryHandle.cs

@ -2,6 +2,7 @@
// Licensed under the Six Labors Split License.
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory.Allocators.Internals;
namespace SixLabors.ImageSharp.Memory.Internals;
@ -13,6 +14,9 @@ internal struct UnmanagedMemoryHandle : IEquatable<UnmanagedMemoryHandle>
// Number of allocation re-attempts when detecting OutOfMemoryException.
private const int MaxAllocationAttempts = 10;
// Alignment for allocated memory blocks.
private static readonly nuint Alignment = MemoryUtilities.GetAlignment();
// Track allocations for testing purposes:
private static int totalOutstandingHandles;
@ -67,15 +71,16 @@ internal struct UnmanagedMemoryHandle : IEquatable<UnmanagedMemoryHandle>
return new UnmanagedMemoryHandle(handle, lengthInBytes);
}
private static IntPtr AllocateHandle(int lengthInBytes)
private static unsafe IntPtr AllocateHandle(int lengthInBytes)
{
int counter = 0;
IntPtr handle = IntPtr.Zero;
while (handle == IntPtr.Zero)
void* ptr = null;
while (ptr is null)
{
try
{
handle = Marshal.AllocHGlobal(lengthInBytes);
ptr = NativeMemory.AlignedAlloc((nuint)lengthInBytes, Alignment);
}
catch (OutOfMemoryException) when (counter < MaxAllocationAttempts)
{
@ -91,10 +96,10 @@ internal struct UnmanagedMemoryHandle : IEquatable<UnmanagedMemoryHandle>
}
}
return handle;
return (IntPtr)ptr;
}
public void Free()
public unsafe void Free()
{
IntPtr h = Interlocked.Exchange(ref this.handle, IntPtr.Zero);
@ -103,7 +108,7 @@ internal struct UnmanagedMemoryHandle : IEquatable<UnmanagedMemoryHandle>
return;
}
Marshal.FreeHGlobal(h);
NativeMemory.AlignedFree((void*)h);
Interlocked.Decrement(ref totalOutstandingHandles);
if (this.lengthInBytes > 0)
{

8
src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroupSpanCache.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Buffers;
@ -13,7 +13,12 @@ namespace SixLabors.ImageSharp.Memory;
internal unsafe struct MemoryGroupSpanCache
{
public SpanCacheMode Mode;
// Managed backing
public byte[]? SingleArray;
public int SingleArrayOffsetBytes;
// Unmanaged backing
public void* SinglePointer;
public void*[] MultiPointer;
@ -28,6 +33,7 @@ internal unsafe struct MemoryGroupSpanCache
{
memoryGroupSpanCache.Mode = SpanCacheMode.SingleArray;
memoryGroupSpanCache.SingleArray = sharedPoolBuffer.Array;
memoryGroupSpanCache.SingleArrayOffsetBytes = sharedPoolBuffer.AlignedOffsetBytes;
}
else if (owner0 is UnmanagedBuffer<T> unmanagedBuffer)
{

8
src/ImageSharp/Memory/DiscontiguousBuffers/MemoryGroup{T}.cs

@ -251,9 +251,15 @@ internal abstract partial class MemoryGroup<T> : IMemoryGroup<T>, IDisposable
{
case SpanCacheMode.SingleArray:
{
ref byte b0 = ref MemoryMarshal.GetReference<byte>(this.memoryGroupSpanCache.SingleArray);
// SingleArray is the raw rented byte[] from SharedArrayPoolBuffer.
// The exposed T-span starts at an aligned byte offset within that array.
ref byte b0 = ref MemoryMarshal.GetArrayDataReference(this.memoryGroupSpanCache.SingleArray!);
b0 = ref Unsafe.Add(ref b0, this.memoryGroupSpanCache.SingleArrayOffsetBytes);
ref T e0 = ref Unsafe.As<byte, T>(ref b0);
e0 = ref Unsafe.Add(ref e0, (uint)(y * width));
return MemoryMarshal.CreateSpan(ref e0, width);
}

Loading…
Cancel
Save