Browse Source

Update and simplify quantization color caches.

pull/3107/head
James Jackson-South 1 month ago
parent
commit
90f0c0b5d4
  1. 2
      src/ImageSharp/Advanced/AotCompilerTools.cs
  2. 10
      src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs
  3. 116
      src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs
  4. 387
      src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs
  5. 2
      src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs
  6. 302
      tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs
  7. 2
      tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs
  8. 158
      tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs

2
src/ImageSharp/Advanced/AotCompilerTools.cs

@ -523,10 +523,8 @@ internal static class AotCompilerTools
private static void AotCompilePixelMaps<TPixel>()
where TPixel : unmanaged, IPixel<TPixel>
{
default(EuclideanPixelMap<TPixel, HybridCache>).GetClosestColor(default, out _);
default(EuclideanPixelMap<TPixel, AccurateCache>).GetClosestColor(default, out _);
default(EuclideanPixelMap<TPixel, CoarseCache>).GetClosestColor(default, out _);
default(EuclideanPixelMap<TPixel, NullCache>).GetClosestColor(default, out _);
}
/// <summary>

10
src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs

@ -15,14 +15,8 @@ public enum ColorMatchingMode
Coarse,
/// <summary>
/// Enables an exact color match cache for the first 512 unique colors encountered,
/// falling back to coarse matching thereafter.
/// </summary>
Hybrid,
/// <summary>
/// Performs exact color matching without any caching optimizations.
/// This is the slowest but most accurate matching strategy.
/// Performs exact color matching using a bounded exact-match cache with eviction.
/// This preserves exact color matching while accelerating repeated colors.
/// </summary>
Exact
}

116
src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs

@ -3,6 +3,8 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Processing.Processors.Quantization;
@ -71,32 +73,107 @@ internal sealed class EuclideanPixelMap<TPixel, TCache> : PixelMap<TPixel>
[MethodImpl(InliningOptions.ColdPath)]
private int GetClosestColorSlow(Rgba32 rgba, ref TPixel paletteRef, out TPixel match)
{
// Loop through the palette and find the nearest match.
ReadOnlySpan<Rgba32> rgbaPalette = this.rgbaPalette;
ref Rgba32 rgbaPaletteRef = ref MemoryMarshal.GetReference(rgbaPalette);
int index = 0;
float leastDistance = float.MaxValue;
for (int i = 0; i < this.rgbaPalette.Length; i++)
int leastDistance = int.MaxValue;
int i = 0;
if (Vector128.IsHardwareAccelerated && rgbaPalette.Length >= 4)
{
Rgba32 candidate = this.rgbaPalette[i];
if (candidate.PackedValue == rgba.PackedValue)
{
index = i;
break;
}
// Duplicate the query color so one 128-bit register can be subtracted from
// two packed RGBA candidates at a time after widening.
Vector128<short> pixel = Vector128.Create(
rgba.R,
rgba.G,
rgba.B,
rgba.A,
rgba.R,
rgba.G,
rgba.B,
rgba.A);
float distance = DistanceSquared(rgba, candidate);
if (distance == 0)
int vectorizedLength = rgbaPalette.Length & ~0x03;
for (; i < vectorizedLength; i += 4)
{
index = i;
break;
// Load four packed Rgba32 values (16 bytes) and widen them into two vectors:
// [c0.r, c0.g, c0.b, c0.a, c1.r, ...] and [c2.r, c2.g, c2.b, c2.a, c3.r, ...].
Vector128<byte> packed = Vector128.LoadUnsafe(ref Unsafe.As<Rgba32, byte>(ref Unsafe.Add(ref rgbaPaletteRef, i)));
Vector128<short> lowerDiff = Vector128.WidenLower(packed).AsInt16() - pixel;
Vector128<short> upperDiff = Vector128.WidenUpper(packed).AsInt16() - pixel;
// MultiplyAddAdjacent collapses channel squares into RG + BA partial sums,
// so each pair of int lanes still corresponds to one candidate color.
Vector128<int> lowerPairs = Vector128_.MultiplyAddAdjacent(lowerDiff, lowerDiff);
Vector128<int> upperPairs = Vector128_.MultiplyAddAdjacent(upperDiff, upperDiff);
// Sum the two partials for candidates i and i + 1.
ref int lowerRef = ref Unsafe.As<Vector128<int>, int>(ref lowerPairs);
int distance = lowerRef + Unsafe.Add(ref lowerRef, 1);
if (distance < leastDistance)
{
index = i;
leastDistance = distance;
if (distance == 0)
{
goto Found;
}
}
distance = Unsafe.Add(ref lowerRef, 2) + Unsafe.Add(ref lowerRef, 3);
if (distance < leastDistance)
{
index = i + 1;
leastDistance = distance;
if (distance == 0)
{
goto Found;
}
}
// Sum the two partials for candidates i + 2 and i + 3.
ref int upperRef = ref Unsafe.As<Vector128<int>, int>(ref upperPairs);
distance = upperRef + Unsafe.Add(ref upperRef, 1);
if (distance < leastDistance)
{
index = i + 2;
leastDistance = distance;
if (distance == 0)
{
goto Found;
}
}
distance = Unsafe.Add(ref upperRef, 2) + Unsafe.Add(ref upperRef, 3);
if (distance < leastDistance)
{
index = i + 3;
leastDistance = distance;
if (distance == 0)
{
goto Found;
}
}
}
}
for (; i < rgbaPalette.Length; i++)
{
int distance = DistanceSquared(rgba, Unsafe.Add(ref rgbaPaletteRef, i));
if (distance < leastDistance)
{
index = i;
leastDistance = distance;
if (distance == 0)
{
goto Found;
}
}
}
Found:
// Now I have the index, pop it into the cache for next time
_ = this.cache.TryAdd(rgba, (short)index);
match = Unsafe.Add(ref paletteRef, (uint)index);
@ -111,12 +188,12 @@ internal sealed class EuclideanPixelMap<TPixel, TCache> : PixelMap<TPixel>
/// <param name="b">The second point.</param>
/// <returns>The distance squared.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
private static float DistanceSquared(Rgba32 a, Rgba32 b)
private static int DistanceSquared(Rgba32 a, Rgba32 b)
{
float deltaR = a.R - b.R;
float deltaG = a.G - b.G;
float deltaB = a.B - b.B;
float deltaA = a.A - b.A;
int deltaR = a.R - b.R;
int deltaG = a.G - b.G;
int deltaB = a.B - b.B;
int deltaA = a.A - b.A;
return (deltaR * deltaR) + (deltaG * deltaG) + (deltaB * deltaB) + (deltaA * deltaA);
}
@ -177,8 +254,7 @@ internal static class PixelMapFactory
ColorMatchingMode colorMatchingMode)
where TPixel : unmanaged, IPixel<TPixel> => colorMatchingMode switch
{
ColorMatchingMode.Hybrid => new EuclideanPixelMap<TPixel, HybridCache>(configuration, palette),
ColorMatchingMode.Exact => new EuclideanPixelMap<TPixel, NullCache>(configuration, palette),
ColorMatchingMode.Exact => new EuclideanPixelMap<TPixel, AccurateCache>(configuration, palette),
_ => new EuclideanPixelMap<TPixel, CoarseCache>(configuration, palette),
};
}

387
src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs

@ -56,147 +56,6 @@ internal interface IColorIndexCache<T> : IColorIndexCache
public static abstract T Create(MemoryAllocator allocator);
}
/// <summary>
/// A hybrid color distance cache that combines a small, fixed-capacity exact-match dictionary
/// (ExactCache, ~4–5 KB for up to 512 entries) with a coarse lookup table (CoarseCache) for 5,5,5,6 precision.
/// </summary>
/// <remarks>
/// ExactCache provides O(1) lookup for common cases using a simple 256-entry hash-based dictionary, while CoarseCache
/// quantizes RGB channels to 5 bits (yielding 32^3 buckets) and alpha to 6 bits, storing up to 4 alpha entries per bucket
/// (a design chosen based on probability theory to capture most real-world variations) for a total memory footprint of
/// roughly 576 KB. Lookups and insertions are performed in constant time, making the overall design both fast and memory-predictable.
/// </remarks>
internal unsafe struct HybridCache : IColorIndexCache<HybridCache>
{
private CoarseCache coarseCache;
private AccurateCache accurateCache;
public HybridCache(MemoryAllocator allocator)
{
this.accurateCache = AccurateCache.Create(allocator);
this.coarseCache = CoarseCache.Create(allocator);
}
/// <inheritdoc/>
public static HybridCache Create(MemoryAllocator allocator) => new(allocator);
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public bool TryAdd(Rgba32 color, short index)
{
if (this.accurateCache.TryAdd(color, index))
{
return true;
}
return this.coarseCache.TryAdd(color, index);
}
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public readonly bool TryGetValue(Rgba32 color, out short value)
{
if (this.accurateCache.TryGetValue(color, out value))
{
return true;
}
return this.coarseCache.TryGetValue(color, out value);
}
/// <inheritdoc/>
public readonly void Clear()
{
this.accurateCache.Clear();
this.coarseCache.Clear();
}
/// <inheritdoc/>
public void Dispose()
{
this.accurateCache.Dispose();
this.coarseCache.Dispose();
}
}
/// <summary>
/// A coarse cache for color distance lookups that uses a fixed-size lookup table.
/// </summary>
/// <remarks>
/// This cache uses a fixed lookup table with 2,097,152 bins, each storing a 2-byte value,
/// resulting in a memory usage of approximately 4 MB. Lookups and insertions are
/// performed in constant time (O(1)) via direct table indexing. This design is optimized for
/// speed while maintaining a predictable, fixed memory footprint.
/// </remarks>
internal unsafe struct CoarseCache : IColorIndexCache<CoarseCache>
{
private const int IndexRBits = 5;
private const int IndexGBits = 5;
private const int IndexBBits = 5;
private const int IndexABits = 6;
private const int IndexRCount = 1 << IndexRBits; // 32 bins for red
private const int IndexGCount = 1 << IndexGBits; // 32 bins for green
private const int IndexBCount = 1 << IndexBBits; // 32 bins for blue
private const int IndexACount = 1 << IndexABits; // 64 bins for alpha
private const int TotalBins = IndexRCount * IndexGCount * IndexBCount * IndexACount; // 2,097,152 bins
private readonly IMemoryOwner<short> binsOwner;
private readonly short* binsPointer;
private MemoryHandle binsHandle;
private CoarseCache(MemoryAllocator allocator)
{
this.binsOwner = allocator.Allocate<short>(TotalBins);
this.binsOwner.GetSpan().Fill(-1);
this.binsHandle = this.binsOwner.Memory.Pin();
this.binsPointer = (short*)this.binsHandle.Pointer;
}
/// <inheritdoc/>
public static CoarseCache Create(MemoryAllocator allocator) => new(allocator);
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public readonly bool TryAdd(Rgba32 color, short value)
{
this.binsPointer[GetCoarseIndex(color)] = value;
return true;
}
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public readonly bool TryGetValue(Rgba32 color, out short value)
{
value = this.binsPointer[GetCoarseIndex(color)];
return value > -1; // Coarse match found
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int GetCoarseIndex(Rgba32 color)
{
int rIndex = color.R >> (8 - IndexRBits);
int gIndex = color.G >> (8 - IndexGBits);
int bIndex = color.B >> (8 - IndexBBits);
int aIndex = color.A >> (8 - IndexABits);
return (aIndex * IndexRCount * IndexGCount * IndexBCount) +
(rIndex * IndexGCount * IndexBCount) +
(gIndex * IndexBCount) +
bIndex;
}
/// <inheritdoc/>
public readonly void Clear()
=> this.binsOwner.GetSpan().Fill(-1);
/// <inheritdoc/>
public void Dispose()
{
this.binsHandle.Dispose();
this.binsOwner.Dispose();
}
}
/// <summary>
/// <para>
/// CoarseCache is a fast, low-memory lookup structure for caching palette indices associated with RGBA values,
@ -225,7 +84,7 @@ internal unsafe struct CoarseCache : IColorIndexCache<CoarseCache>
/// making it ideal for applications such as color distance caching in images with a limited palette (up to 256 entries).
/// </para>
/// </summary>
internal unsafe struct CoarseCacheLite : IColorIndexCache<CoarseCacheLite>
internal unsafe struct CoarseCache : IColorIndexCache<CoarseCache>
{
// Use 5 bits per channel for R, G, and B: 32 levels each.
// Total buckets = 32^3 = 32768.
@ -236,7 +95,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache<CoarseCacheLite>
private readonly AlphaBucket* buckets;
private MemoryHandle bucketHandle;
private CoarseCacheLite(MemoryAllocator allocator)
private CoarseCache(MemoryAllocator allocator)
{
this.bucketsOwner = allocator.Allocate<AlphaBucket>(BucketCount, AllocationOptions.Clean);
this.bucketHandle = this.bucketsOwner.Memory.Pin();
@ -244,7 +103,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache<CoarseCacheLite>
}
/// <inheritdoc/>
public static CoarseCacheLite Create(MemoryAllocator allocator) => new(allocator);
public static CoarseCache Create(MemoryAllocator allocator) => new(allocator);
/// <inheritdoc/>
public readonly bool TryAdd(Rgba32 color, short paletteIndex)
@ -289,14 +148,11 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache<CoarseCacheLite>
}
[MethodImpl(InliningOptions.ShortMethod)]
private static byte QuantizeAlpha(byte a)
// Quantize to 6 bits: shift right by (8 - 6) = 2 bits.
=> (byte)(a >> 2);
private static byte QuantizeAlpha(byte a) => (byte)(a >> 2);
public struct AlphaEntry
{
// Store the alpha value quantized to 6 bits (0..63)
// Store the alpha value quantized to 6 bits (0..63).
public byte QuantizedAlpha;
public short PaletteIndex;
}
@ -312,7 +168,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache<CoarseCacheLite>
// 2. However, in practice (based on probability theory and typical image data),
// the number of unique alpha values that actually occur for a given quantized RGB
// bucket is usually very small. If you randomly sample 8 values out of 64,
// the probability that these 4 samples are all unique is high if the distribution
// the probability that these samples are all unique is high if the distribution
// of alpha values is skewed or if only a few alpha values are used.
//
// 3. Statistically, for many real-world images, most RGB buckets will have only a couple
@ -377,51 +233,49 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache<CoarseCacheLite>
}
/// <summary>
/// A fixed-capacity dictionary with exactly 512 entries mapping a <see cref="uint"/> key
/// to a <see cref="short"/> value.
/// A fixed-size exact-match cache that stores packed RGBA keys with 4-way set associativity.
/// </summary>
/// <remarks>
/// The dictionary is implemented using a fixed array of 512 buckets and an entries array
/// of the same size. The bucket for a key is computed as (key &amp; 0x1FF), and collisions are
/// resolved through a linked chain stored in the <see cref="Entry.Next"/> field.
/// The cache holds 512 total entries split across 128 sets. Entries are evicted within a set
/// using round-robin replacement, but cached values are returned only when the full packed RGBA
/// key matches, preserving exact quantization results with predictable memory usage.
/// The overall memory usage is approximately 4–5 KB. Both lookup and insertion operations are,
/// on average, O(1) since the bucket is determined via a simple bitmask and collision chains are
/// typically very short; in the worst-case, the number of iterations is bounded by 256.
/// on average, O(1) since each lookup probes at most four candidate entries within the selected set.
/// This guarantees highly efficient and predictable performance for small, fixed-size color palettes.
/// </remarks>
internal unsafe struct AccurateCache : IColorIndexCache<AccurateCache>
{
// Buckets array: each bucket holds the index (0-based) into the entries array
// of the first entry in the chain, or -1 if empty.
private readonly IMemoryOwner<short> bucketsOwner;
private MemoryHandle bucketsHandle;
private short* buckets;
public const int Capacity = 512;
private const int Ways = 4;
private const int SetCount = Capacity / Ways;
private const int SetMask = SetCount - 1;
// Entries array: stores up to 256 entries.
private readonly IMemoryOwner<Entry> entriesOwner;
private MemoryHandle entriesHandle;
private Entry* entries;
private readonly IMemoryOwner<uint> keysOwner;
private MemoryHandle keysHandle;
private uint* keys;
public const int Capacity = 512;
private readonly IMemoryOwner<ushort> valuesOwner;
private MemoryHandle valuesHandle;
private ushort* values;
private readonly IMemoryOwner<byte> nextVictimOwner;
private MemoryHandle nextVictimHandle;
private byte* nextVictim;
private AccurateCache(MemoryAllocator allocator)
{
this.Count = 0;
// Allocate exactly 512 indexes for buckets.
this.bucketsOwner = allocator.Allocate<short>(Capacity, AllocationOptions.Clean);
Span<short> bucketSpan = this.bucketsOwner.GetSpan();
bucketSpan.Fill(-1);
this.bucketsHandle = this.bucketsOwner.Memory.Pin();
this.buckets = (short*)this.bucketsHandle.Pointer;
// Allocate exactly 512 entries.
this.entriesOwner = allocator.Allocate<Entry>(Capacity, AllocationOptions.Clean);
this.entriesHandle = this.entriesOwner.Memory.Pin();
this.entries = (Entry*)this.entriesHandle.Pointer;
}
this.keysOwner = allocator.Allocate<uint>(Capacity, AllocationOptions.Clean);
this.keysHandle = this.keysOwner.Memory.Pin();
this.keys = (uint*)this.keysHandle.Pointer;
public int Count { get; private set; }
this.valuesOwner = allocator.Allocate<ushort>(Capacity, AllocationOptions.Clean);
this.valuesHandle = this.valuesOwner.Memory.Pin();
this.values = (ushort*)this.valuesHandle.Pointer;
this.nextVictimOwner = allocator.Allocate<byte>(SetCount, AllocationOptions.Clean);
this.nextVictimHandle = this.nextVictimOwner.Memory.Pin();
this.nextVictim = (byte*)this.nextVictimHandle.Pointer;
}
/// <inheritdoc/>
public static AccurateCache Create(MemoryAllocator allocator) => new(allocator);
@ -430,140 +284,113 @@ internal unsafe struct AccurateCache : IColorIndexCache<AccurateCache>
[MethodImpl(InliningOptions.ShortMethod)]
public bool TryAdd(Rgba32 color, short value)
{
if (this.Count == Capacity)
{
return false; // Dictionary is full.
}
uint key = color.PackedValue;
int set = GetSetIndex(key);
int start = set * Ways;
int empty = -1;
uint* keys = this.keys;
ushort* values = this.values;
ushort storedValue = (ushort)(value + 1);
// The key is a 32-bit unsigned integer representing an RGBA color, where the bytes are laid out as R|G|B|A
// (with R in the most significant byte and A in the least significant).
// To compute the bucket index:
// 1. (key >> 16) extracts the top 16 bits, effectively giving us the R and G channels.
// 2. (key >> 8) shifts the key right by 8 bits, bringing R, G, and B into the lower 24 bits (dropping A).
// 3. XORing these two values with the original key mixes bits from all four channels (R, G, B, and A),
// which helps to counteract situations where one or more channels have a limited range.
// 4. Finally, we apply a bitmask of 0x1FF to keep only the lowest 9 bits, ensuring the result is between 0 and 511,
// which corresponds to our fixed bucket count of 512.
int bucket = (int)(((key >> 16) ^ (key >> 8) ^ key) & 0x1FF);
int i = this.buckets[bucket];
// Traverse the collision chain.
Entry* entries = this.entries;
while (i != -1)
for (int i = start; i < start + Ways; i++)
{
Entry e = entries[i];
if (e.Key == key)
ushort candidate = values[i];
if (candidate == 0)
{
// Key already exists; do not overwrite.
return false;
empty = i;
continue;
}
i = e.Next;
if (keys[i] == key)
{
values[i] = storedValue;
return true;
}
}
short index = (short)this.Count;
this.Count++;
int slot = empty >= 0 ? empty : start + this.nextVictim[set];
keys[slot] = key;
values[slot] = storedValue;
// Insert the new entry:
entries[index].Key = key;
entries[index].Value = value;
if (empty < 0)
{
this.nextVictim[set] = (byte)((this.nextVictim[set] + 1) & (Ways - 1));
}
// Link this new entry into the bucket chain.
entries[index].Next = this.buckets[bucket];
this.buckets[bucket] = index;
return true;
}
/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public bool TryGetValue(Rgba32 color, out short value)
public readonly bool TryGetValue(Rgba32 color, out short value)
{
uint key = color.PackedValue;
int bucket = (int)(((key >> 16) ^ (key >> 8) ^ key) & 0x1FF);
int i = this.buckets[bucket];
int start = GetSetIndex(key) * Ways;
// If the bucket is empty, return immediately.
if (i == -1)
{
value = -1;
return false;
}
uint* keys = this.keys;
ushort* values = this.values;
// Traverse the chain.
Entry* entries = this.entries;
do
for (int i = start; i < start + Ways; i++)
{
Entry e = entries[i];
if (e.Key == key)
ushort candidate = values[i];
if (candidate != 0 && keys[i] == key)
{
value = e.Value;
value = (short)(candidate - 1);
return true;
}
i = e.Next;
}
while (i != -1);
value = -1;
return false;
}
/// <summary>
/// Clears the dictionary.
/// Clears the cache.
/// </summary>
public void Clear()
public readonly void Clear()
{
Span<short> bucketSpan = this.bucketsOwner.GetSpan();
bucketSpan.Fill(-1);
this.Count = 0;
this.valuesOwner.GetSpan().Clear();
this.nextVictimOwner.GetSpan().Clear();
}
public void Dispose()
{
this.bucketsHandle.Dispose();
this.bucketsOwner.Dispose();
this.entriesHandle.Dispose();
this.entriesOwner.Dispose();
this.buckets = null;
this.entries = null;
this.keysHandle.Dispose();
this.keysOwner.Dispose();
this.valuesHandle.Dispose();
this.valuesOwner.Dispose();
this.nextVictimHandle.Dispose();
this.nextVictimOwner.Dispose();
this.keys = null;
this.values = null;
this.nextVictim = null;
}
private struct Entry
{
public uint Key; // The key (packed RGBA)
public short Value; // The value; -1 means unused.
public short Next; // Index of the next entry in the chain, or -1 if none.
}
}
/// <summary>
/// Represents a cache that does not store any values.
/// It allows adding colors, but always returns false when trying to retrieve them.
/// </summary>
internal readonly struct NullCache : IColorIndexCache<NullCache>
{
/// <inheritdoc/>
public static NullCache Create(MemoryAllocator allocator) => default;
/// <inheritdoc/>
public bool TryAdd(Rgba32 color, short value) => true;
/// <inheritdoc/>
public bool TryGetValue(Rgba32 color, out short value)
{
value = -1;
return false;
}
/// <inheritdoc/>
public void Clear()
{
}
/// <inheritdoc/>
public void Dispose()
{
}
/// <summary>
/// Maps a packed RGBA key to one of the cache sets used by <see cref="AccurateCache"/>.
/// </summary>
/// <param name="key">The packed <see cref="Rgba32.PackedValue"/> key.</param>
/// <returns>The zero-based set index for the key.</returns>
/// <remarks>
/// <para>
/// The cache is 4-way set-associative, so this hash only needs to choose one of
/// <see cref="SetCount"/> sets before probing up to four candidate entries.
/// </para>
/// <para>
/// <see cref="Rgba32.PackedValue"/> is laid out as <c>R | (G &lt;&lt; 8) | (B &lt;&lt; 16) | (A &lt;&lt; 24)</c>.
/// The XOR-fold mixes neighboring bytes into the low bits, and the final mask selects the
/// set. With the current 128-set layout that makes the selected set effectively depend on
/// the low 7 bits of <c>R ^ G ^ B</c>. Alpha still participates in the later exact key
/// comparison, but not in set selection.
/// </para>
/// <para>
/// Collisions are expected and acceptable here. Correctness comes from the full packed-key
/// comparison during probing; this hash only aims to spread keys cheaply enough that each
/// access touches at most one 4-entry set.
/// </para>
/// </remarks>
[MethodImpl(InliningOptions.ShortMethod)]
private static int GetSetIndex(uint key)
=> (int)(((key >> 16) ^ (key >> 8) ^ key) & SetMask);
}

2
src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs

@ -368,7 +368,7 @@ public struct OctreeQuantizer<TPixel> : IQuantizer<TPixel>
public void Dispose() => this.nodesOwner.Dispose();
[StructLayout(LayoutKind.Sequential)]
internal unsafe struct OctreeNode
internal struct OctreeNode
{
public bool Leaf;
public int PixelCount;

302
tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs

@ -0,0 +1,302 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Buffers;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing.Processors.Quantization;
namespace SixLabors.ImageSharp.Benchmarks.Processing;
[Config(typeof(Config.Standard))]
public class ColorMatchingCaches
{
// IterationSetup forces BenchmarkDotNet to use a single benchmark invocation per iteration.
// Repeated lookups can safely replay a smaller working set because that workload is explicitly
// meant to model steady-state cache hits after warmup.
private const int RepeatedLookupCount = 262_144;
// DitherLike should avoid replaying the same stream across multiple passes because that warms
// the caches in a way real high-churn input usually does not. Make the single pass larger instead.
private const int DitherLikeLookupCount = 1_048_576;
private const int RepeatedPassCount = 16;
private Rgba32[] palette;
private Rgba32[] repeatedSeedColors;
private Rgba32[] repeatedLookups;
private Rgba32[] ditherLookups;
private PixelMap<Rgba32> coarse;
private PixelMap<Rgba32> legacyCoarse;
private PixelMap<Rgba32> exact;
private PixelMap<Rgba32> uncached;
[Params(16, 256)]
public int PaletteSize { get; set; }
[Params(CacheWorkload.Repeated, CacheWorkload.DitherLike)]
public CacheWorkload Workload { get; set; }
[GlobalSetup]
public void Setup()
{
this.palette = CreatePalette(this.PaletteSize);
this.repeatedSeedColors = CreateRepeatedSeedColors(this.palette);
this.repeatedLookups = CreateRepeatedLookups(this.repeatedSeedColors);
this.ditherLookups = CreateDitherLikeLookups();
this.coarse = CreatePixelMap<CoarseCache>(this.palette);
this.legacyCoarse = CreatePixelMap<LegacyCoarseCache>(this.palette);
this.exact = CreatePixelMap<AccurateCache>(this.palette);
this.uncached = CreatePixelMap<UncachedCache>(this.palette);
}
[IterationSetup]
public void ResetCaches()
{
// Each benchmark iteration should start from the same cache state so we measure
// the cache policy itself rather than warm state carried over from a previous iteration.
this.coarse.Clear(this.palette);
this.legacyCoarse.Clear(this.palette);
this.exact.Clear(this.palette);
this.uncached.Clear(this.palette);
if (this.Workload == CacheWorkload.Repeated)
{
// Prime the repeated workload so the benchmark reflects steady-state hit behavior
// instead of mostly measuring the first-wave fill cost.
Prime(this.coarse, this.repeatedSeedColors);
Prime(this.legacyCoarse, this.repeatedSeedColors);
Prime(this.exact, this.repeatedSeedColors);
Prime(this.uncached, this.repeatedSeedColors);
}
}
[GlobalCleanup]
public void Cleanup()
{
this.coarse.Dispose();
this.legacyCoarse.Dispose();
this.exact.Dispose();
this.uncached.Dispose();
}
[Benchmark(Baseline = true, Description = "Coarse")]
public int Coarse() => this.Run(this.coarse);
[Benchmark(Description = "Legacy Coarse")]
public int LegacyCoarse() => this.Run(this.legacyCoarse);
[Benchmark(Description = "Exact Cached")]
public int Exact() => this.Run(this.exact);
[Benchmark(Description = "Exact Uncached")]
public int Uncached() => this.Run(this.uncached);
public enum CacheWorkload
{
// A small working set that is intentionally reused after priming to measure hit-heavy behavior.
Repeated,
// A deterministic high-churn stream intended to resemble dithered lookups where exact repeats are rare.
DitherLike
}
private int Run(PixelMap<Rgba32> map)
{
Rgba32[] lookups = this.Workload == CacheWorkload.Repeated ? this.repeatedLookups : this.ditherLookups;
int passCount = this.Workload == CacheWorkload.Repeated ? RepeatedPassCount : 1;
int checksum = 0;
// Repeated intentionally replays the same lookup stream to measure steady-state hit behavior.
// DitherLike runs as a single larger pass so we do not turn a churn-heavy workload into an
// artificially warmed cache benchmark by replaying the exact same sequence.
for (int pass = 0; pass < passCount; pass++)
{
for (int i = 0; i < lookups.Length; i++)
{
checksum = unchecked((checksum * 31) + map.GetClosestColor(lookups[i], out _));
}
}
return checksum;
}
private static PixelMap<Rgba32> CreatePixelMap<TCache>(Rgba32[] palette)
where TCache : struct, IColorIndexCache<TCache>
=> new EuclideanPixelMap<Rgba32, TCache>(Configuration.Default, palette);
private static void Prime(PixelMap<Rgba32> map, Rgba32[] colors)
{
for (int i = 0; i < colors.Length; i++)
{
map.GetClosestColor(colors[i], out _);
}
}
private static Rgba32[] CreatePalette(int count)
{
Rgba32[] result = new Rgba32[count];
for (int i = 0; i < result.Length; i++)
{
// Use the Knuth/golden-ratio multiplicative hash constant to spread colors across
// RGBA space without clustering into a gradient. That keeps the benchmark from
// accidentally favoring any cache because the palette itself is too regular.
uint value = unchecked((uint)(i + 1) * 2654435761U);
result[i] = new(
(byte)value,
(byte)(value >> 8),
(byte)(value >> 16),
(byte)((value >> 24) | 0x80));
}
return result;
}
private static Rgba32[] CreateRepeatedSeedColors(Rgba32[] palette)
{
// Reuse colors derived from the palette but perturb them slightly so the workload still
// exercises nearest-color matching rather than only exact palette-entry hits.
int count = Math.Min(64, palette.Length * 2);
Rgba32[] result = new Rgba32[count];
for (int i = 0; i < result.Length; i++)
{
Rgba32 source = palette[(i * 17) % palette.Length];
result[i] = new(
(byte)(source.R + ((i * 3) & 0x07)),
(byte)(source.G + ((i * 5) & 0x07)),
(byte)(source.B + ((i * 7) & 0x07)),
source.A);
}
return result;
}
private static Rgba32[] CreateRepeatedLookups(Rgba32[] seedColors)
{
Rgba32[] result = new Rgba32[RepeatedLookupCount];
// Cycle a small seed set to produce a stable, hit-heavy stream after priming.
for (int i = 0; i < result.Length; i++)
{
result[i] = seedColors[i % seedColors.Length];
}
return result;
}
private static Rgba32[] CreateDitherLikeLookups()
{
Rgba32[] result = new Rgba32[DitherLikeLookupCount];
// Generate a deterministic pseudo-image signal with independent channel slopes so nearby
// samples are correlated but exact repeats are uncommon, which is closer to dithered input.
for (int i = 0; i < result.Length; i++)
{
int x = i & 511;
int y = i >> 9;
result[i] = new(
(byte)((x * 17) + (y * 13)),
(byte)((x * 29) + (y * 7)),
(byte)((x * 11) + (y * 23)),
(byte)(255 - ((x * 3) + (y * 5))));
}
return result;
}
/// <summary>
/// Preserves the original direct-mapped coarse cache implementation for side-by-side benchmarks.
/// </summary>
private unsafe struct LegacyCoarseCache : IColorIndexCache<LegacyCoarseCache>
{
private const int IndexRBits = 5;
private const int IndexGBits = 5;
private const int IndexBBits = 5;
private const int IndexABits = 6;
private const int IndexRCount = 1 << IndexRBits;
private const int IndexGCount = 1 << IndexGBits;
private const int IndexBCount = 1 << IndexBBits;
private const int IndexACount = 1 << IndexABits;
private const int TotalBins = IndexRCount * IndexGCount * IndexBCount * IndexACount;
private readonly IMemoryOwner<short> binsOwner;
private readonly short* binsPointer;
private MemoryHandle binsHandle;
private LegacyCoarseCache(MemoryAllocator allocator)
{
this.binsOwner = allocator.Allocate<short>(TotalBins);
this.binsOwner.GetSpan().Fill(-1);
this.binsHandle = this.binsOwner.Memory.Pin();
this.binsPointer = (short*)this.binsHandle.Pointer;
}
public static LegacyCoarseCache Create(MemoryAllocator allocator) => new(allocator);
[MethodImpl(InliningOptions.ShortMethod)]
public readonly bool TryAdd(Rgba32 color, short value)
{
this.binsPointer[GetCoarseIndex(color)] = value;
return true;
}
[MethodImpl(InliningOptions.ShortMethod)]
public readonly bool TryGetValue(Rgba32 color, out short value)
{
value = this.binsPointer[GetCoarseIndex(color)];
return value > -1;
}
public readonly void Clear() => this.binsOwner.GetSpan().Fill(-1);
public void Dispose()
{
this.binsHandle.Dispose();
this.binsOwner.Dispose();
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int GetCoarseIndex(Rgba32 color)
{
int rIndex = color.R >> (8 - IndexRBits);
int gIndex = color.G >> (8 - IndexGBits);
int bIndex = color.B >> (8 - IndexBBits);
int aIndex = color.A >> (8 - IndexABits);
return (aIndex * IndexRCount * IndexGCount * IndexBCount) +
(rIndex * IndexGCount * IndexBCount) +
(gIndex * IndexBCount) +
bIndex;
}
}
/// <summary>
/// Preserves the uncached path for exact-cache comparison benchmarks.
/// </summary>
private readonly struct UncachedCache : IColorIndexCache<UncachedCache>
{
public static UncachedCache Create(MemoryAllocator allocator) => default;
public bool TryAdd(Rgba32 color, short value) => true;
public bool TryGetValue(Rgba32 color, out short value)
{
value = -1;
return false;
}
public void Clear()
{
}
public void Dispose()
{
}
}
}

2
tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs

@ -680,7 +680,7 @@ public partial class PngEncoderTests
PaletteQuantizer quantizer = new(
palette.Select(Color.FromPixel).ToArray(),
new QuantizerOptions { ColorMatchingMode = ColorMatchingMode.Hybrid });
new QuantizerOptions { ColorMatchingMode = ColorMatchingMode.Exact });
using MemoryStream ms = new();
image.Save(ms, new PngEncoder

158
tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.Processing.Processors.Quantization;
@ -75,4 +76,161 @@ public class PaletteQuantizerTests
IQuantizer quantizer = KnownQuantizers.Werner;
Assert.Equal(QuantizerConstants.DefaultDither, quantizer.Options.Dither);
}
[Fact]
public void ExactColorMatchingMatchesUncachedAfterCacheOverflow()
{
Rgba32[] palette =
[
new Rgba32(0, 0, 0),
new Rgba32(7, 0, 0)
];
using PixelMap<Rgba32> exact = CreatePixelMap<UncachedCache>(palette);
using PixelMap<Rgba32> cachedExact = CreatePixelMap<AccurateCache>(palette);
for (int i = 0; i < AccurateCache.Capacity; i++)
{
cachedExact.GetClosestColor(CreateOverflowFillerColor(i), out _);
}
Rgba32 first = new(1, 0, 0);
Rgba32 second = new(6, 0, 0);
AssertMatchesUncached(exact, cachedExact, first);
AssertMatchesUncached(exact, cachedExact, second);
}
[Fact]
public void ExactColorMatchingMatchesUncachedAcrossManyProbeBinsAfterRepeatedEviction()
{
Rgba32[] palette = CreateGrayscalePalette(256);
using PixelMap<Rgba32> exact = CreatePixelMap<UncachedCache>(palette);
using PixelMap<Rgba32> cachedExact = CreatePixelMap<AccurateCache>(palette);
for (int i = 0; i < AccurateCache.Capacity * 2; i++)
{
cachedExact.GetClosestColor(CreateEvictionFillerColor(i), out _);
}
for (int i = 0; i < AccurateCache.Capacity; i++)
{
AssertMatchesUncached(exact, cachedExact, CreateEvictionProbeColor(i));
}
}
[Fact]
public void ExactColorMatchingMatchesUncachedForDitherStressColorSequence()
{
Rgba32[] palette = CreateGrayscalePalette(16);
using Image<Rgba32> source = CreateDitherStressImage();
using PixelMap<Rgba32> exact = CreatePixelMap<UncachedCache>(palette);
using PixelMap<Rgba32> cachedExact = CreatePixelMap<AccurateCache>(palette);
for (int y = 0; y < source.Height; y++)
{
for (int x = 0; x < source.Width; x++)
{
AssertMatchesUncached(exact, cachedExact, source[x, y]);
}
}
}
// Split the first 512 integers across R and G so the warmup loop produces 512 distinct exact colors:
// the low 8 bits go into R, and the ninth bit spills into G once R wraps after 255.
// Keeping B fixed and G offset away from zero also avoids accidentally probing the red-axis test colors below.
private static Rgba32 CreateOverflowFillerColor(int i)
=> new((byte)i, (byte)(16 + (i >> 8)), 32);
// Treat i as three packed 5-bit coordinates and expand each coordinate back to an 8-bit channel by
// shifting left by 3. That lands on the lower edge of each 5-bit coarse bucket, giving the test a
// deterministic way to fill many distinct coarse buckets before probing nearby exact colors.
private static Rgba32 CreateEvictionFillerColor(int i)
{
byte r = (byte)((i & 31) << 3);
byte g = (byte)(((i >> 5) & 31) << 3);
byte b = (byte)(((i >> 10) & 31) << 3);
return new(r, g, b);
}
// Reconstruct the same 5-bit RGB bucket coordinates used by CreateEvictionFillerColor, then set the
// low 3 bits in each channel to 0b111. That keeps the probe inside the same coarse bucket while making
// it a different exact color, which is the shape that used to expose coarse-fallback false hits.
private static Rgba32 CreateEvictionProbeColor(int i)
{
byte r = (byte)(((i & 31) << 3) | 0x07);
byte g = (byte)((((i >> 5) & 31) << 3) | 0x07);
byte b = (byte)((((i >> 10) & 31) << 3) | 0x07);
return new(r, g, b);
}
private static PixelMap<Rgba32> CreatePixelMap<TCache>(Rgba32[] palette)
where TCache : struct, IColorIndexCache<TCache>
=> new EuclideanPixelMap<Rgba32, TCache>(Configuration.Default, palette);
private static void AssertMatchesUncached(PixelMap<Rgba32> exact, PixelMap<Rgba32> cachedExact, Rgba32 color)
{
int exactIndex = exact.GetClosestColor(color, out Rgba32 exactMatch);
int cachedIndex = cachedExact.GetClosestColor(color, out Rgba32 cachedMatch);
Assert.Equal(exactIndex, cachedIndex);
Assert.Equal(exactMatch, cachedMatch);
}
private static Rgba32[] CreateGrayscalePalette(int count)
{
Rgba32[] palette = new Rgba32[count];
for (int i = 0; i < count; i++)
{
byte value = count == 1 ? (byte)0 : (byte)((i * 255) / (count - 1));
palette[i] = new Rgba32(value, value, value);
}
return palette;
}
// Generate a deterministic pseudo-image where each channel uses a different x/y slope.
// Neighboring pixels stay correlated, like real image content, but the combined RGB values
// churn heavily enough that exact repeats are rare. That makes this a useful stress input
// for verifying cached exact matching against an uncached baseline under dither-like access.
private static Image<Rgba32> CreateDitherStressImage()
{
Image<Rgba32> image = new(192, 96);
for (int y = 0; y < image.Height; y++)
{
for (int x = 0; x < image.Width; x++)
{
image[x, y] = new Rgba32(
(byte)((x * 17) + (y * 13)),
(byte)((x * 29) + (y * 7)),
(byte)((x * 11) + (y * 23)));
}
}
return image;
}
private readonly struct UncachedCache : IColorIndexCache<UncachedCache>
{
public static UncachedCache Create(MemoryAllocator allocator) => default;
public bool TryAdd(Rgba32 color, short value) => true;
public bool TryGetValue(Rgba32 color, out short value)
{
value = -1;
return false;
}
public void Clear()
{
}
public void Dispose()
{
}
}
}

Loading…
Cancel
Save