diff --git a/src/ImageSharp/Advanced/AotCompilerTools.cs b/src/ImageSharp/Advanced/AotCompilerTools.cs index fef49bffd4..2944b58e5f 100644 --- a/src/ImageSharp/Advanced/AotCompilerTools.cs +++ b/src/ImageSharp/Advanced/AotCompilerTools.cs @@ -523,10 +523,8 @@ internal static class AotCompilerTools private static void AotCompilePixelMaps() where TPixel : unmanaged, IPixel { - default(EuclideanPixelMap).GetClosestColor(default, out _); default(EuclideanPixelMap).GetClosestColor(default, out _); default(EuclideanPixelMap).GetClosestColor(default, out _); - default(EuclideanPixelMap).GetClosestColor(default, out _); } /// diff --git a/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs b/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs index 26fd7d5d76..c520d7c54b 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/ColorMatchingMode.cs @@ -15,14 +15,8 @@ public enum ColorMatchingMode Coarse, /// - /// Enables an exact color match cache for the first 512 unique colors encountered, - /// falling back to coarse matching thereafter. - /// - Hybrid, - - /// - /// Performs exact color matching without any caching optimizations. - /// This is the slowest but most accurate matching strategy. + /// Performs exact color matching using a bounded exact-match cache with eviction. + /// This preserves exact color matching while accelerating repeated colors. /// Exact } diff --git a/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs b/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs index 5b0c7252cb..e2e7206e09 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel,TCache}.cs @@ -3,6 +3,8 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Processing.Processors.Quantization; @@ -71,32 +73,107 @@ internal sealed class EuclideanPixelMap : PixelMap [MethodImpl(InliningOptions.ColdPath)] private int GetClosestColorSlow(Rgba32 rgba, ref TPixel paletteRef, out TPixel match) { - // Loop through the palette and find the nearest match. + ReadOnlySpan rgbaPalette = this.rgbaPalette; + ref Rgba32 rgbaPaletteRef = ref MemoryMarshal.GetReference(rgbaPalette); int index = 0; - float leastDistance = float.MaxValue; - for (int i = 0; i < this.rgbaPalette.Length; i++) + int leastDistance = int.MaxValue; + int i = 0; + + if (Vector128.IsHardwareAccelerated && rgbaPalette.Length >= 4) { - Rgba32 candidate = this.rgbaPalette[i]; - if (candidate.PackedValue == rgba.PackedValue) - { - index = i; - break; - } + // Duplicate the query color so one 128-bit register can be subtracted from + // two packed RGBA candidates at a time after widening. + Vector128 pixel = Vector128.Create( + rgba.R, + rgba.G, + rgba.B, + rgba.A, + rgba.R, + rgba.G, + rgba.B, + rgba.A); - float distance = DistanceSquared(rgba, candidate); - if (distance == 0) + int vectorizedLength = rgbaPalette.Length & ~0x03; + + for (; i < vectorizedLength; i += 4) { - index = i; - break; + // Load four packed Rgba32 values (16 bytes) and widen them into two vectors: + // [c0.r, c0.g, c0.b, c0.a, c1.r, ...] and [c2.r, c2.g, c2.b, c2.a, c3.r, ...]. + Vector128 packed = Vector128.LoadUnsafe(ref Unsafe.As(ref Unsafe.Add(ref rgbaPaletteRef, i))); + Vector128 lowerDiff = Vector128.WidenLower(packed).AsInt16() - pixel; + Vector128 upperDiff = Vector128.WidenUpper(packed).AsInt16() - pixel; + + // MultiplyAddAdjacent collapses channel squares into RG + BA partial sums, + // so each pair of int lanes still corresponds to one candidate color. + Vector128 lowerPairs = Vector128_.MultiplyAddAdjacent(lowerDiff, lowerDiff); + Vector128 upperPairs = Vector128_.MultiplyAddAdjacent(upperDiff, upperDiff); + + // Sum the two partials for candidates i and i + 1. + ref int lowerRef = ref Unsafe.As, int>(ref lowerPairs); + int distance = lowerRef + Unsafe.Add(ref lowerRef, 1); + if (distance < leastDistance) + { + index = i; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } + + distance = Unsafe.Add(ref lowerRef, 2) + Unsafe.Add(ref lowerRef, 3); + if (distance < leastDistance) + { + index = i + 1; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } + + // Sum the two partials for candidates i + 2 and i + 3. + ref int upperRef = ref Unsafe.As, int>(ref upperPairs); + distance = upperRef + Unsafe.Add(ref upperRef, 1); + if (distance < leastDistance) + { + index = i + 2; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } + + distance = Unsafe.Add(ref upperRef, 2) + Unsafe.Add(ref upperRef, 3); + if (distance < leastDistance) + { + index = i + 3; + leastDistance = distance; + if (distance == 0) + { + goto Found; + } + } } + } + for (; i < rgbaPalette.Length; i++) + { + int distance = DistanceSquared(rgba, Unsafe.Add(ref rgbaPaletteRef, i)); if (distance < leastDistance) { index = i; leastDistance = distance; + if (distance == 0) + { + goto Found; + } } } + Found: + // Now I have the index, pop it into the cache for next time _ = this.cache.TryAdd(rgba, (short)index); match = Unsafe.Add(ref paletteRef, (uint)index); @@ -111,12 +188,12 @@ internal sealed class EuclideanPixelMap : PixelMap /// The second point. /// The distance squared. [MethodImpl(InliningOptions.ShortMethod)] - private static float DistanceSquared(Rgba32 a, Rgba32 b) + private static int DistanceSquared(Rgba32 a, Rgba32 b) { - float deltaR = a.R - b.R; - float deltaG = a.G - b.G; - float deltaB = a.B - b.B; - float deltaA = a.A - b.A; + int deltaR = a.R - b.R; + int deltaG = a.G - b.G; + int deltaB = a.B - b.B; + int deltaA = a.A - b.A; return (deltaR * deltaR) + (deltaG * deltaG) + (deltaB * deltaB) + (deltaA * deltaA); } @@ -177,8 +254,7 @@ internal static class PixelMapFactory ColorMatchingMode colorMatchingMode) where TPixel : unmanaged, IPixel => colorMatchingMode switch { - ColorMatchingMode.Hybrid => new EuclideanPixelMap(configuration, palette), - ColorMatchingMode.Exact => new EuclideanPixelMap(configuration, palette), + ColorMatchingMode.Exact => new EuclideanPixelMap(configuration, palette), _ => new EuclideanPixelMap(configuration, palette), }; } diff --git a/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs b/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs index 32d95137bc..76598e0046 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/IColorIndexCache.cs @@ -56,147 +56,6 @@ internal interface IColorIndexCache : IColorIndexCache public static abstract T Create(MemoryAllocator allocator); } -/// -/// A hybrid color distance cache that combines a small, fixed-capacity exact-match dictionary -/// (ExactCache, ~4–5 KB for up to 512 entries) with a coarse lookup table (CoarseCache) for 5,5,5,6 precision. -/// -/// -/// ExactCache provides O(1) lookup for common cases using a simple 256-entry hash-based dictionary, while CoarseCache -/// quantizes RGB channels to 5 bits (yielding 32^3 buckets) and alpha to 6 bits, storing up to 4 alpha entries per bucket -/// (a design chosen based on probability theory to capture most real-world variations) for a total memory footprint of -/// roughly 576 KB. Lookups and insertions are performed in constant time, making the overall design both fast and memory-predictable. -/// -internal unsafe struct HybridCache : IColorIndexCache -{ - private CoarseCache coarseCache; - private AccurateCache accurateCache; - - public HybridCache(MemoryAllocator allocator) - { - this.accurateCache = AccurateCache.Create(allocator); - this.coarseCache = CoarseCache.Create(allocator); - } - - /// - public static HybridCache Create(MemoryAllocator allocator) => new(allocator); - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public bool TryAdd(Rgba32 color, short index) - { - if (this.accurateCache.TryAdd(color, index)) - { - return true; - } - - return this.coarseCache.TryAdd(color, index); - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public readonly bool TryGetValue(Rgba32 color, out short value) - { - if (this.accurateCache.TryGetValue(color, out value)) - { - return true; - } - - return this.coarseCache.TryGetValue(color, out value); - } - - /// - public readonly void Clear() - { - this.accurateCache.Clear(); - this.coarseCache.Clear(); - } - - /// - public void Dispose() - { - this.accurateCache.Dispose(); - this.coarseCache.Dispose(); - } -} - -/// -/// A coarse cache for color distance lookups that uses a fixed-size lookup table. -/// -/// -/// This cache uses a fixed lookup table with 2,097,152 bins, each storing a 2-byte value, -/// resulting in a memory usage of approximately 4 MB. Lookups and insertions are -/// performed in constant time (O(1)) via direct table indexing. This design is optimized for -/// speed while maintaining a predictable, fixed memory footprint. -/// -internal unsafe struct CoarseCache : IColorIndexCache -{ - private const int IndexRBits = 5; - private const int IndexGBits = 5; - private const int IndexBBits = 5; - private const int IndexABits = 6; - private const int IndexRCount = 1 << IndexRBits; // 32 bins for red - private const int IndexGCount = 1 << IndexGBits; // 32 bins for green - private const int IndexBCount = 1 << IndexBBits; // 32 bins for blue - private const int IndexACount = 1 << IndexABits; // 64 bins for alpha - private const int TotalBins = IndexRCount * IndexGCount * IndexBCount * IndexACount; // 2,097,152 bins - - private readonly IMemoryOwner binsOwner; - private readonly short* binsPointer; - private MemoryHandle binsHandle; - - private CoarseCache(MemoryAllocator allocator) - { - this.binsOwner = allocator.Allocate(TotalBins); - this.binsOwner.GetSpan().Fill(-1); - this.binsHandle = this.binsOwner.Memory.Pin(); - this.binsPointer = (short*)this.binsHandle.Pointer; - } - - /// - public static CoarseCache Create(MemoryAllocator allocator) => new(allocator); - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public readonly bool TryAdd(Rgba32 color, short value) - { - this.binsPointer[GetCoarseIndex(color)] = value; - return true; - } - - /// - [MethodImpl(InliningOptions.ShortMethod)] - public readonly bool TryGetValue(Rgba32 color, out short value) - { - value = this.binsPointer[GetCoarseIndex(color)]; - return value > -1; // Coarse match found - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static int GetCoarseIndex(Rgba32 color) - { - int rIndex = color.R >> (8 - IndexRBits); - int gIndex = color.G >> (8 - IndexGBits); - int bIndex = color.B >> (8 - IndexBBits); - int aIndex = color.A >> (8 - IndexABits); - - return (aIndex * IndexRCount * IndexGCount * IndexBCount) + - (rIndex * IndexGCount * IndexBCount) + - (gIndex * IndexBCount) + - bIndex; - } - - /// - public readonly void Clear() - => this.binsOwner.GetSpan().Fill(-1); - - /// - public void Dispose() - { - this.binsHandle.Dispose(); - this.binsOwner.Dispose(); - } -} - /// /// /// CoarseCache is a fast, low-memory lookup structure for caching palette indices associated with RGBA values, @@ -225,7 +84,7 @@ internal unsafe struct CoarseCache : IColorIndexCache /// making it ideal for applications such as color distance caching in images with a limited palette (up to 256 entries). /// /// -internal unsafe struct CoarseCacheLite : IColorIndexCache +internal unsafe struct CoarseCache : IColorIndexCache { // Use 5 bits per channel for R, G, and B: 32 levels each. // Total buckets = 32^3 = 32768. @@ -236,7 +95,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache private readonly AlphaBucket* buckets; private MemoryHandle bucketHandle; - private CoarseCacheLite(MemoryAllocator allocator) + private CoarseCache(MemoryAllocator allocator) { this.bucketsOwner = allocator.Allocate(BucketCount, AllocationOptions.Clean); this.bucketHandle = this.bucketsOwner.Memory.Pin(); @@ -244,7 +103,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache } /// - public static CoarseCacheLite Create(MemoryAllocator allocator) => new(allocator); + public static CoarseCache Create(MemoryAllocator allocator) => new(allocator); /// public readonly bool TryAdd(Rgba32 color, short paletteIndex) @@ -289,14 +148,11 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache } [MethodImpl(InliningOptions.ShortMethod)] - private static byte QuantizeAlpha(byte a) - - // Quantize to 6 bits: shift right by (8 - 6) = 2 bits. - => (byte)(a >> 2); + private static byte QuantizeAlpha(byte a) => (byte)(a >> 2); public struct AlphaEntry { - // Store the alpha value quantized to 6 bits (0..63) + // Store the alpha value quantized to 6 bits (0..63). public byte QuantizedAlpha; public short PaletteIndex; } @@ -312,7 +168,7 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache // 2. However, in practice (based on probability theory and typical image data), // the number of unique alpha values that actually occur for a given quantized RGB // bucket is usually very small. If you randomly sample 8 values out of 64, - // the probability that these 4 samples are all unique is high if the distribution + // the probability that these samples are all unique is high if the distribution // of alpha values is skewed or if only a few alpha values are used. // // 3. Statistically, for many real-world images, most RGB buckets will have only a couple @@ -377,51 +233,49 @@ internal unsafe struct CoarseCacheLite : IColorIndexCache } /// -/// A fixed-capacity dictionary with exactly 512 entries mapping a key -/// to a value. +/// A fixed-size exact-match cache that stores packed RGBA keys with 4-way set associativity. /// /// -/// The dictionary is implemented using a fixed array of 512 buckets and an entries array -/// of the same size. The bucket for a key is computed as (key & 0x1FF), and collisions are -/// resolved through a linked chain stored in the field. +/// The cache holds 512 total entries split across 128 sets. Entries are evicted within a set +/// using round-robin replacement, but cached values are returned only when the full packed RGBA +/// key matches, preserving exact quantization results with predictable memory usage. /// The overall memory usage is approximately 4–5 KB. Both lookup and insertion operations are, -/// on average, O(1) since the bucket is determined via a simple bitmask and collision chains are -/// typically very short; in the worst-case, the number of iterations is bounded by 256. +/// on average, O(1) since each lookup probes at most four candidate entries within the selected set. /// This guarantees highly efficient and predictable performance for small, fixed-size color palettes. /// internal unsafe struct AccurateCache : IColorIndexCache { - // Buckets array: each bucket holds the index (0-based) into the entries array - // of the first entry in the chain, or -1 if empty. - private readonly IMemoryOwner bucketsOwner; - private MemoryHandle bucketsHandle; - private short* buckets; + public const int Capacity = 512; + private const int Ways = 4; + private const int SetCount = Capacity / Ways; + private const int SetMask = SetCount - 1; - // Entries array: stores up to 256 entries. - private readonly IMemoryOwner entriesOwner; - private MemoryHandle entriesHandle; - private Entry* entries; + private readonly IMemoryOwner keysOwner; + private MemoryHandle keysHandle; + private uint* keys; - public const int Capacity = 512; + private readonly IMemoryOwner valuesOwner; + private MemoryHandle valuesHandle; + private ushort* values; + + private readonly IMemoryOwner nextVictimOwner; + private MemoryHandle nextVictimHandle; + private byte* nextVictim; private AccurateCache(MemoryAllocator allocator) { - this.Count = 0; - - // Allocate exactly 512 indexes for buckets. - this.bucketsOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); - Span bucketSpan = this.bucketsOwner.GetSpan(); - bucketSpan.Fill(-1); - this.bucketsHandle = this.bucketsOwner.Memory.Pin(); - this.buckets = (short*)this.bucketsHandle.Pointer; - - // Allocate exactly 512 entries. - this.entriesOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); - this.entriesHandle = this.entriesOwner.Memory.Pin(); - this.entries = (Entry*)this.entriesHandle.Pointer; - } + this.keysOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); + this.keysHandle = this.keysOwner.Memory.Pin(); + this.keys = (uint*)this.keysHandle.Pointer; - public int Count { get; private set; } + this.valuesOwner = allocator.Allocate(Capacity, AllocationOptions.Clean); + this.valuesHandle = this.valuesOwner.Memory.Pin(); + this.values = (ushort*)this.valuesHandle.Pointer; + + this.nextVictimOwner = allocator.Allocate(SetCount, AllocationOptions.Clean); + this.nextVictimHandle = this.nextVictimOwner.Memory.Pin(); + this.nextVictim = (byte*)this.nextVictimHandle.Pointer; + } /// public static AccurateCache Create(MemoryAllocator allocator) => new(allocator); @@ -430,140 +284,113 @@ internal unsafe struct AccurateCache : IColorIndexCache [MethodImpl(InliningOptions.ShortMethod)] public bool TryAdd(Rgba32 color, short value) { - if (this.Count == Capacity) - { - return false; // Dictionary is full. - } - uint key = color.PackedValue; + int set = GetSetIndex(key); + int start = set * Ways; + int empty = -1; + + uint* keys = this.keys; + ushort* values = this.values; + ushort storedValue = (ushort)(value + 1); - // The key is a 32-bit unsigned integer representing an RGBA color, where the bytes are laid out as R|G|B|A - // (with R in the most significant byte and A in the least significant). - // To compute the bucket index: - // 1. (key >> 16) extracts the top 16 bits, effectively giving us the R and G channels. - // 2. (key >> 8) shifts the key right by 8 bits, bringing R, G, and B into the lower 24 bits (dropping A). - // 3. XORing these two values with the original key mixes bits from all four channels (R, G, B, and A), - // which helps to counteract situations where one or more channels have a limited range. - // 4. Finally, we apply a bitmask of 0x1FF to keep only the lowest 9 bits, ensuring the result is between 0 and 511, - // which corresponds to our fixed bucket count of 512. - int bucket = (int)(((key >> 16) ^ (key >> 8) ^ key) & 0x1FF); - int i = this.buckets[bucket]; - - // Traverse the collision chain. - Entry* entries = this.entries; - while (i != -1) + for (int i = start; i < start + Ways; i++) { - Entry e = entries[i]; - if (e.Key == key) + ushort candidate = values[i]; + if (candidate == 0) { - // Key already exists; do not overwrite. - return false; + empty = i; + continue; } - i = e.Next; + if (keys[i] == key) + { + values[i] = storedValue; + return true; + } } - short index = (short)this.Count; - this.Count++; + int slot = empty >= 0 ? empty : start + this.nextVictim[set]; + keys[slot] = key; + values[slot] = storedValue; - // Insert the new entry: - entries[index].Key = key; - entries[index].Value = value; + if (empty < 0) + { + this.nextVictim[set] = (byte)((this.nextVictim[set] + 1) & (Ways - 1)); + } - // Link this new entry into the bucket chain. - entries[index].Next = this.buckets[bucket]; - this.buckets[bucket] = index; return true; } /// [MethodImpl(InliningOptions.ShortMethod)] - public bool TryGetValue(Rgba32 color, out short value) + public readonly bool TryGetValue(Rgba32 color, out short value) { uint key = color.PackedValue; - int bucket = (int)(((key >> 16) ^ (key >> 8) ^ key) & 0x1FF); - int i = this.buckets[bucket]; + int start = GetSetIndex(key) * Ways; - // If the bucket is empty, return immediately. - if (i == -1) - { - value = -1; - return false; - } + uint* keys = this.keys; + ushort* values = this.values; - // Traverse the chain. - Entry* entries = this.entries; - do + for (int i = start; i < start + Ways; i++) { - Entry e = entries[i]; - if (e.Key == key) + ushort candidate = values[i]; + if (candidate != 0 && keys[i] == key) { - value = e.Value; + value = (short)(candidate - 1); return true; } - - i = e.Next; } - while (i != -1); value = -1; return false; } /// - /// Clears the dictionary. + /// Clears the cache. /// - public void Clear() + public readonly void Clear() { - Span bucketSpan = this.bucketsOwner.GetSpan(); - bucketSpan.Fill(-1); - this.Count = 0; + this.valuesOwner.GetSpan().Clear(); + this.nextVictimOwner.GetSpan().Clear(); } public void Dispose() { - this.bucketsHandle.Dispose(); - this.bucketsOwner.Dispose(); - this.entriesHandle.Dispose(); - this.entriesOwner.Dispose(); - this.buckets = null; - this.entries = null; + this.keysHandle.Dispose(); + this.keysOwner.Dispose(); + this.valuesHandle.Dispose(); + this.valuesOwner.Dispose(); + this.nextVictimHandle.Dispose(); + this.nextVictimOwner.Dispose(); + this.keys = null; + this.values = null; + this.nextVictim = null; } - private struct Entry - { - public uint Key; // The key (packed RGBA) - public short Value; // The value; -1 means unused. - public short Next; // Index of the next entry in the chain, or -1 if none. - } -} - -/// -/// Represents a cache that does not store any values. -/// It allows adding colors, but always returns false when trying to retrieve them. -/// -internal readonly struct NullCache : IColorIndexCache -{ - /// - public static NullCache Create(MemoryAllocator allocator) => default; - - /// - public bool TryAdd(Rgba32 color, short value) => true; - - /// - public bool TryGetValue(Rgba32 color, out short value) - { - value = -1; - return false; - } - - /// - public void Clear() - { - } - - /// - public void Dispose() - { - } + /// + /// Maps a packed RGBA key to one of the cache sets used by . + /// + /// The packed key. + /// The zero-based set index for the key. + /// + /// + /// The cache is 4-way set-associative, so this hash only needs to choose one of + /// sets before probing up to four candidate entries. + /// + /// + /// is laid out as R | (G << 8) | (B << 16) | (A << 24). + /// The XOR-fold mixes neighboring bytes into the low bits, and the final mask selects the + /// set. With the current 128-set layout that makes the selected set effectively depend on + /// the low 7 bits of R ^ G ^ B. Alpha still participates in the later exact key + /// comparison, but not in set selection. + /// + /// + /// Collisions are expected and acceptable here. Correctness comes from the full packed-key + /// comparison during probing; this hash only aims to spread keys cheaply enough that each + /// access touches at most one 4-entry set. + /// + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static int GetSetIndex(uint key) + => (int)(((key >> 16) ^ (key >> 8) ^ key) & SetMask); } diff --git a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs index 07596b68a8..bdf2ba20a8 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/OctreeQuantizer{TPixel}.cs @@ -368,7 +368,7 @@ public struct OctreeQuantizer : IQuantizer public void Dispose() => this.nodesOwner.Dispose(); [StructLayout(LayoutKind.Sequential)] - internal unsafe struct OctreeNode + internal struct OctreeNode { public bool Leaf; public int PixelCount; diff --git a/tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs b/tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs new file mode 100644 index 0000000000..dbaf21a8ef --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Processing/ColorMatchingCaches.cs @@ -0,0 +1,302 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Buffers; +using System.Runtime.CompilerServices; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing.Processors.Quantization; + +namespace SixLabors.ImageSharp.Benchmarks.Processing; + +[Config(typeof(Config.Standard))] +public class ColorMatchingCaches +{ + // IterationSetup forces BenchmarkDotNet to use a single benchmark invocation per iteration. + // Repeated lookups can safely replay a smaller working set because that workload is explicitly + // meant to model steady-state cache hits after warmup. + private const int RepeatedLookupCount = 262_144; + + // DitherLike should avoid replaying the same stream across multiple passes because that warms + // the caches in a way real high-churn input usually does not. Make the single pass larger instead. + private const int DitherLikeLookupCount = 1_048_576; + private const int RepeatedPassCount = 16; + + private Rgba32[] palette; + private Rgba32[] repeatedSeedColors; + private Rgba32[] repeatedLookups; + private Rgba32[] ditherLookups; + + private PixelMap coarse; + private PixelMap legacyCoarse; + private PixelMap exact; + private PixelMap uncached; + + [Params(16, 256)] + public int PaletteSize { get; set; } + + [Params(CacheWorkload.Repeated, CacheWorkload.DitherLike)] + public CacheWorkload Workload { get; set; } + + [GlobalSetup] + public void Setup() + { + this.palette = CreatePalette(this.PaletteSize); + this.repeatedSeedColors = CreateRepeatedSeedColors(this.palette); + this.repeatedLookups = CreateRepeatedLookups(this.repeatedSeedColors); + this.ditherLookups = CreateDitherLikeLookups(); + + this.coarse = CreatePixelMap(this.palette); + this.legacyCoarse = CreatePixelMap(this.palette); + this.exact = CreatePixelMap(this.palette); + this.uncached = CreatePixelMap(this.palette); + } + + [IterationSetup] + public void ResetCaches() + { + // Each benchmark iteration should start from the same cache state so we measure + // the cache policy itself rather than warm state carried over from a previous iteration. + this.coarse.Clear(this.palette); + this.legacyCoarse.Clear(this.palette); + this.exact.Clear(this.palette); + this.uncached.Clear(this.palette); + + if (this.Workload == CacheWorkload.Repeated) + { + // Prime the repeated workload so the benchmark reflects steady-state hit behavior + // instead of mostly measuring the first-wave fill cost. + Prime(this.coarse, this.repeatedSeedColors); + Prime(this.legacyCoarse, this.repeatedSeedColors); + Prime(this.exact, this.repeatedSeedColors); + Prime(this.uncached, this.repeatedSeedColors); + } + } + + [GlobalCleanup] + public void Cleanup() + { + this.coarse.Dispose(); + this.legacyCoarse.Dispose(); + this.exact.Dispose(); + this.uncached.Dispose(); + } + + [Benchmark(Baseline = true, Description = "Coarse")] + public int Coarse() => this.Run(this.coarse); + + [Benchmark(Description = "Legacy Coarse")] + public int LegacyCoarse() => this.Run(this.legacyCoarse); + + [Benchmark(Description = "Exact Cached")] + public int Exact() => this.Run(this.exact); + + [Benchmark(Description = "Exact Uncached")] + public int Uncached() => this.Run(this.uncached); + + public enum CacheWorkload + { + // A small working set that is intentionally reused after priming to measure hit-heavy behavior. + Repeated, + + // A deterministic high-churn stream intended to resemble dithered lookups where exact repeats are rare. + DitherLike + } + + private int Run(PixelMap map) + { + Rgba32[] lookups = this.Workload == CacheWorkload.Repeated ? this.repeatedLookups : this.ditherLookups; + int passCount = this.Workload == CacheWorkload.Repeated ? RepeatedPassCount : 1; + int checksum = 0; + + // Repeated intentionally replays the same lookup stream to measure steady-state hit behavior. + // DitherLike runs as a single larger pass so we do not turn a churn-heavy workload into an + // artificially warmed cache benchmark by replaying the exact same sequence. + for (int pass = 0; pass < passCount; pass++) + { + for (int i = 0; i < lookups.Length; i++) + { + checksum = unchecked((checksum * 31) + map.GetClosestColor(lookups[i], out _)); + } + } + + return checksum; + } + + private static PixelMap CreatePixelMap(Rgba32[] palette) + where TCache : struct, IColorIndexCache + => new EuclideanPixelMap(Configuration.Default, palette); + + private static void Prime(PixelMap map, Rgba32[] colors) + { + for (int i = 0; i < colors.Length; i++) + { + map.GetClosestColor(colors[i], out _); + } + } + + private static Rgba32[] CreatePalette(int count) + { + Rgba32[] result = new Rgba32[count]; + + for (int i = 0; i < result.Length; i++) + { + // Use the Knuth/golden-ratio multiplicative hash constant to spread colors across + // RGBA space without clustering into a gradient. That keeps the benchmark from + // accidentally favoring any cache because the palette itself is too regular. + uint value = unchecked((uint)(i + 1) * 2654435761U); + result[i] = new( + (byte)value, + (byte)(value >> 8), + (byte)(value >> 16), + (byte)((value >> 24) | 0x80)); + } + + return result; + } + + private static Rgba32[] CreateRepeatedSeedColors(Rgba32[] palette) + { + // Reuse colors derived from the palette but perturb them slightly so the workload still + // exercises nearest-color matching rather than only exact palette-entry hits. + int count = Math.Min(64, palette.Length * 2); + Rgba32[] result = new Rgba32[count]; + + for (int i = 0; i < result.Length; i++) + { + Rgba32 source = palette[(i * 17) % palette.Length]; + result[i] = new( + (byte)(source.R + ((i * 3) & 0x07)), + (byte)(source.G + ((i * 5) & 0x07)), + (byte)(source.B + ((i * 7) & 0x07)), + source.A); + } + + return result; + } + + private static Rgba32[] CreateRepeatedLookups(Rgba32[] seedColors) + { + Rgba32[] result = new Rgba32[RepeatedLookupCount]; + + // Cycle a small seed set to produce a stable, hit-heavy stream after priming. + for (int i = 0; i < result.Length; i++) + { + result[i] = seedColors[i % seedColors.Length]; + } + + return result; + } + + private static Rgba32[] CreateDitherLikeLookups() + { + Rgba32[] result = new Rgba32[DitherLikeLookupCount]; + + // Generate a deterministic pseudo-image signal with independent channel slopes so nearby + // samples are correlated but exact repeats are uncommon, which is closer to dithered input. + for (int i = 0; i < result.Length; i++) + { + int x = i & 511; + int y = i >> 9; + + result[i] = new( + (byte)((x * 17) + (y * 13)), + (byte)((x * 29) + (y * 7)), + (byte)((x * 11) + (y * 23)), + (byte)(255 - ((x * 3) + (y * 5)))); + } + + return result; + } + + /// + /// Preserves the original direct-mapped coarse cache implementation for side-by-side benchmarks. + /// + private unsafe struct LegacyCoarseCache : IColorIndexCache + { + private const int IndexRBits = 5; + private const int IndexGBits = 5; + private const int IndexBBits = 5; + private const int IndexABits = 6; + private const int IndexRCount = 1 << IndexRBits; + private const int IndexGCount = 1 << IndexGBits; + private const int IndexBCount = 1 << IndexBBits; + private const int IndexACount = 1 << IndexABits; + private const int TotalBins = IndexRCount * IndexGCount * IndexBCount * IndexACount; + + private readonly IMemoryOwner binsOwner; + private readonly short* binsPointer; + private MemoryHandle binsHandle; + + private LegacyCoarseCache(MemoryAllocator allocator) + { + this.binsOwner = allocator.Allocate(TotalBins); + this.binsOwner.GetSpan().Fill(-1); + this.binsHandle = this.binsOwner.Memory.Pin(); + this.binsPointer = (short*)this.binsHandle.Pointer; + } + + public static LegacyCoarseCache Create(MemoryAllocator allocator) => new(allocator); + + [MethodImpl(InliningOptions.ShortMethod)] + public readonly bool TryAdd(Rgba32 color, short value) + { + this.binsPointer[GetCoarseIndex(color)] = value; + return true; + } + + [MethodImpl(InliningOptions.ShortMethod)] + public readonly bool TryGetValue(Rgba32 color, out short value) + { + value = this.binsPointer[GetCoarseIndex(color)]; + return value > -1; + } + + public readonly void Clear() => this.binsOwner.GetSpan().Fill(-1); + + public void Dispose() + { + this.binsHandle.Dispose(); + this.binsOwner.Dispose(); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static int GetCoarseIndex(Rgba32 color) + { + int rIndex = color.R >> (8 - IndexRBits); + int gIndex = color.G >> (8 - IndexGBits); + int bIndex = color.B >> (8 - IndexBBits); + int aIndex = color.A >> (8 - IndexABits); + + return (aIndex * IndexRCount * IndexGCount * IndexBCount) + + (rIndex * IndexGCount * IndexBCount) + + (gIndex * IndexBCount) + + bIndex; + } + } + + /// + /// Preserves the uncached path for exact-cache comparison benchmarks. + /// + private readonly struct UncachedCache : IColorIndexCache + { + public static UncachedCache Create(MemoryAllocator allocator) => default; + + public bool TryAdd(Rgba32 color, short value) => true; + + public bool TryGetValue(Rgba32 color, out short value) + { + value = -1; + return false; + } + + public void Clear() + { + } + + public void Dispose() + { + } + } +} diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 4ebcbc13b6..eef8d5ba84 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -680,7 +680,7 @@ public partial class PngEncoderTests PaletteQuantizer quantizer = new( palette.Select(Color.FromPixel).ToArray(), - new QuantizerOptions { ColorMatchingMode = ColorMatchingMode.Hybrid }); + new QuantizerOptions { ColorMatchingMode = ColorMatchingMode.Exact }); using MemoryStream ms = new(); image.Save(ms, new PngEncoder diff --git a/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs index f2a4b079b5..07e9a4b0d6 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Quantization/PaletteQuantizerTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Quantization; @@ -75,4 +76,161 @@ public class PaletteQuantizerTests IQuantizer quantizer = KnownQuantizers.Werner; Assert.Equal(QuantizerConstants.DefaultDither, quantizer.Options.Dither); } + + [Fact] + public void ExactColorMatchingMatchesUncachedAfterCacheOverflow() + { + Rgba32[] palette = + [ + new Rgba32(0, 0, 0), + new Rgba32(7, 0, 0) + ]; + + using PixelMap exact = CreatePixelMap(palette); + using PixelMap cachedExact = CreatePixelMap(palette); + + for (int i = 0; i < AccurateCache.Capacity; i++) + { + cachedExact.GetClosestColor(CreateOverflowFillerColor(i), out _); + } + + Rgba32 first = new(1, 0, 0); + Rgba32 second = new(6, 0, 0); + + AssertMatchesUncached(exact, cachedExact, first); + AssertMatchesUncached(exact, cachedExact, second); + } + + [Fact] + public void ExactColorMatchingMatchesUncachedAcrossManyProbeBinsAfterRepeatedEviction() + { + Rgba32[] palette = CreateGrayscalePalette(256); + + using PixelMap exact = CreatePixelMap(palette); + using PixelMap cachedExact = CreatePixelMap(palette); + + for (int i = 0; i < AccurateCache.Capacity * 2; i++) + { + cachedExact.GetClosestColor(CreateEvictionFillerColor(i), out _); + } + + for (int i = 0; i < AccurateCache.Capacity; i++) + { + AssertMatchesUncached(exact, cachedExact, CreateEvictionProbeColor(i)); + } + } + + [Fact] + public void ExactColorMatchingMatchesUncachedForDitherStressColorSequence() + { + Rgba32[] palette = CreateGrayscalePalette(16); + + using Image source = CreateDitherStressImage(); + using PixelMap exact = CreatePixelMap(palette); + using PixelMap cachedExact = CreatePixelMap(palette); + + for (int y = 0; y < source.Height; y++) + { + for (int x = 0; x < source.Width; x++) + { + AssertMatchesUncached(exact, cachedExact, source[x, y]); + } + } + } + + // Split the first 512 integers across R and G so the warmup loop produces 512 distinct exact colors: + // the low 8 bits go into R, and the ninth bit spills into G once R wraps after 255. + // Keeping B fixed and G offset away from zero also avoids accidentally probing the red-axis test colors below. + private static Rgba32 CreateOverflowFillerColor(int i) + => new((byte)i, (byte)(16 + (i >> 8)), 32); + + // Treat i as three packed 5-bit coordinates and expand each coordinate back to an 8-bit channel by + // shifting left by 3. That lands on the lower edge of each 5-bit coarse bucket, giving the test a + // deterministic way to fill many distinct coarse buckets before probing nearby exact colors. + private static Rgba32 CreateEvictionFillerColor(int i) + { + byte r = (byte)((i & 31) << 3); + byte g = (byte)(((i >> 5) & 31) << 3); + byte b = (byte)(((i >> 10) & 31) << 3); + return new(r, g, b); + } + + // Reconstruct the same 5-bit RGB bucket coordinates used by CreateEvictionFillerColor, then set the + // low 3 bits in each channel to 0b111. That keeps the probe inside the same coarse bucket while making + // it a different exact color, which is the shape that used to expose coarse-fallback false hits. + private static Rgba32 CreateEvictionProbeColor(int i) + { + byte r = (byte)(((i & 31) << 3) | 0x07); + byte g = (byte)((((i >> 5) & 31) << 3) | 0x07); + byte b = (byte)((((i >> 10) & 31) << 3) | 0x07); + return new(r, g, b); + } + + private static PixelMap CreatePixelMap(Rgba32[] palette) + where TCache : struct, IColorIndexCache + => new EuclideanPixelMap(Configuration.Default, palette); + + private static void AssertMatchesUncached(PixelMap exact, PixelMap cachedExact, Rgba32 color) + { + int exactIndex = exact.GetClosestColor(color, out Rgba32 exactMatch); + int cachedIndex = cachedExact.GetClosestColor(color, out Rgba32 cachedMatch); + + Assert.Equal(exactIndex, cachedIndex); + Assert.Equal(exactMatch, cachedMatch); + } + + private static Rgba32[] CreateGrayscalePalette(int count) + { + Rgba32[] palette = new Rgba32[count]; + for (int i = 0; i < count; i++) + { + byte value = count == 1 ? (byte)0 : (byte)((i * 255) / (count - 1)); + palette[i] = new Rgba32(value, value, value); + } + + return palette; + } + + // Generate a deterministic pseudo-image where each channel uses a different x/y slope. + // Neighboring pixels stay correlated, like real image content, but the combined RGB values + // churn heavily enough that exact repeats are rare. That makes this a useful stress input + // for verifying cached exact matching against an uncached baseline under dither-like access. + private static Image CreateDitherStressImage() + { + Image image = new(192, 96); + + for (int y = 0; y < image.Height; y++) + { + for (int x = 0; x < image.Width; x++) + { + image[x, y] = new Rgba32( + (byte)((x * 17) + (y * 13)), + (byte)((x * 29) + (y * 7)), + (byte)((x * 11) + (y * 23))); + } + } + + return image; + } + + private readonly struct UncachedCache : IColorIndexCache + { + public static UncachedCache Create(MemoryAllocator allocator) => default; + + public bool TryAdd(Rgba32 color, short value) => true; + + public bool TryGetValue(Rgba32 color, out short value) + { + value = -1; + return false; + } + + public void Clear() + { + } + + public void Dispose() + { + } + } }