Browse Source

Merge pull request #3051 from SixLabors/js/webp-icc

Add ICC profile conversion support to WEBP
pull/3054/head
James Jackson-South 1 week ago
committed by GitHub
parent
commit
da6d56bc62
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 122
      src/ImageSharp/ColorProfiles/ColorProfileConverterExtensionsPixelCompatible.cs
  2. 184
      src/ImageSharp/ColorProfiles/Rgb.cs
  3. 1
      src/ImageSharp/Formats/Webp/WebpDecoderCore.cs
  4. 13
      tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs
  5. 6
      tests/ImageSharp.Tests/TestImages.cs
  6. 3
      tests/Images/External/ReferenceOutput/WebpDecoderTests/Decode_WhenColorProfileHandlingIsConvert_ApplyIccProfile_Rgba32_Perceptual-cLUT-only.png
  7. 3
      tests/Images/External/ReferenceOutput/WebpDecoderTests/Decode_WhenColorProfileHandlingIsConvert_ApplyIccProfile_Rgba32_Perceptual.png
  8. 3
      tests/Images/Input/Webp/icc-profiles/Perceptual-cLUT-only.webp
  9. 3
      tests/Images/Input/Webp/icc-profiles/Perceptual.webp

122
src/ImageSharp/ColorProfiles/ColorProfileConverterExtensionsPixelCompatible.cs

@ -5,6 +5,8 @@ using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
@ -60,8 +62,126 @@ internal static class ColorProfileConverterExtensionsPixelCompatible
converter.ConvertUsingIccProfile<Rgb, Rgb>(rgbSpan, rgbSpan);
// Copy the converted Rgb pixels back to the row as TPixel.
// Important: Preserve alpha from the existing row Vector4 values.
// We merge RGB from rgbSpan into row, leaving W untouched.
ref float srcRgb = ref Unsafe.As<Rgb, float>(ref MemoryMarshal.GetReference(rgbSpan));
ref float dstRow = ref Unsafe.As<Vector4, float>(ref MemoryMarshal.GetReference(row));
int count = rgbSpan.Length;
int i = 0;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static Vector512<float> ReadVector512(ref float f)
{
ref byte b = ref Unsafe.As<float, byte>(ref f);
return Unsafe.ReadUnaligned<Vector512<float>>(ref b);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void WriteVector512(ref float f, Vector512<float> v)
{
ref byte b = ref Unsafe.As<float, byte>(ref f);
Unsafe.WriteUnaligned(ref b, v);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static Vector256<float> ReadVector256(ref float f)
{
ref byte b = ref Unsafe.As<float, byte>(ref f);
return Unsafe.ReadUnaligned<Vector256<float>>(ref b);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void WriteVector256(ref float f, Vector256<float> v)
{
ref byte b = ref Unsafe.As<float, byte>(ref f);
Unsafe.WriteUnaligned(ref b, v);
}
if (Avx512F.IsSupported)
{
// 4 pixels per iteration.
//
// Source layout (Rgb float stream, 12 floats):
// [r0 g0 b0 r1 g1 b1 r2 g2 b2 r3 g3 b3]
//
// Destination layout (row Vector4 float stream, 16 floats):
// [r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3]
//
// We use an overlapped load (16 floats) from the 3-float stride source.
// The permute selects the RGB we need and inserts placeholders for alpha lanes.
//
// Then we blend RGB lanes into the existing destination, preserving alpha lanes.
Vector512<int> rgbPerm = Vector512.Create(0, 1, 2, 0, 3, 4, 5, 0, 6, 7, 8, 0, 9, 10, 11, 0);
// BlendVariable selects from the second operand where the sign bit of the mask lane is set.
// We want to overwrite lanes 0,1,2 then 4,5,6 then 8,9,10 then 12,13,14, and preserve lanes 3,7,11,15 (alpha).
Vector512<float> rgbSelect = Vector512.Create(-0F, -0F, -0F, 0F, -0F, -0F, -0F, 0F, -0F, -0F, -0F, 0F, -0F, -0F, -0F, 0F);
int quads = count >> 2;
int simdQuads = quads - 1; // Leave the last quad for the scalar tail to avoid the final overlapped load reading past the end.
for (int q = 0; q < simdQuads; q++)
{
Vector512<float> dst = ReadVector512(ref dstRow);
Vector512<float> src = ReadVector512(ref srcRgb);
Vector512<float> rgbx = Avx512F.PermuteVar16x32(src, rgbPerm);
Vector512<float> merged = Avx512F.BlendVariable(dst, rgbx, rgbSelect);
WriteVector512(ref dstRow, merged);
// Advance input by 4 pixels (4 * 3 = 12 floats)
srcRgb = ref Unsafe.Add(ref srcRgb, 12);
// Advance output by 4 pixels (4 * 4 = 16 floats)
dstRow = ref Unsafe.Add(ref dstRow, 16);
i += 4;
}
}
else if (Avx2.IsSupported)
{
// 2 pixels per iteration.
//
// Same idea as AVX-512, but on 256-bit vectors.
// We permute packed RGB into rgbx layout and blend into the existing destination,
// preserving alpha lanes.
Vector256<int> rgbPerm = Vector256.Create(0, 1, 2, 0, 3, 4, 5, 0);
Vector256<float> rgbSelect = Vector256.Create(-0F, -0F, -0F, 0F, -0F, -0F, -0F, 0F);
int pairs = count >> 1;
int simdPairs = pairs - 1; // Leave the last pair for the scalar tail to avoid the final overlapped load reading past the end.
for (int p = 0; p < simdPairs; p++)
{
Vector256<float> dst = ReadVector256(ref dstRow);
Vector256<float> src = ReadVector256(ref srcRgb);
Vector256<float> rgbx = Avx2.PermuteVar8x32(src, rgbPerm);
Vector256<float> merged = Avx.BlendVariable(dst, rgbx, rgbSelect);
WriteVector256(ref dstRow, merged);
// Advance input by 2 pixels (2 * 3 = 6 floats)
srcRgb = ref Unsafe.Add(ref srcRgb, 6);
// Advance output by 2 pixels (2 * 4 = 8 floats)
dstRow = ref Unsafe.Add(ref dstRow, 8);
i += 2;
}
}
// Scalar tail.
// Handles:
// - the last skipped SIMD block (quad or pair)
// - any remainder
//
// Preserve alpha by writing Vector3 into the Vector4 storage.
ref Vector4 rowRef = ref MemoryMarshal.GetReference(row);
for (int i = 0; i < rgbSpan.Length; i++)
for (; i < count; i++)
{
Vector3 rgb = rgbSpan[i].AsVector3Unsafe();
Unsafe.As<Vector4, Vector3>(ref Unsafe.Add(ref rowRef, (uint)i)) = rgb;

184
src/ImageSharp/ColorProfiles/Rgb.cs

@ -4,6 +4,8 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.ColorProfiles.WorkingSpaces;
namespace SixLabors.ImageSharp.ColorProfiles;
@ -105,10 +107,87 @@ public readonly struct Rgb : IProfileConnectingSpace<Rgb, CieXyz>
{
Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination));
// TODO: Optimize via SIMD
for (int i = 0; i < source.Length; i++)
int length = source.Length;
if (length == 0)
{
destination[i] = source[i].ToScaledVector4();
return;
}
ref Rgb srcRgb = ref MemoryMarshal.GetReference(source);
ref Vector4 dstV4 = ref MemoryMarshal.GetReference(destination);
// Float streams:
// src: r0 g0 b0 r1 g1 b1 ...
// dst: r0 g0 b0 a0 r1 g1 b1 a1 ...
ref float src = ref Unsafe.As<Rgb, float>(ref srcRgb);
ref float dst = ref Unsafe.As<Vector4, float>(ref dstV4);
int i = 0;
if (Avx512F.IsSupported)
{
// 4 pixels per iteration. Using overlapped 16-float loads.
Vector512<int> perm = Vector512.Create(0, 1, 2, 0, 3, 4, 5, 0, 6, 7, 8, 0, 9, 10, 11, 0);
Vector512<float> ones = Vector512.Create(1F);
// BlendVariable selects from 'ones' where the sign-bit of mask lane is set.
// Using -0f sets only the sign bit, producing an efficient "select lane" mask.
Vector512<float> alphaSelect = Vector512.Create(0F, 0F, 0F, -0F, 0F, 0F, 0F, -0F, 0F, 0F, 0F, -0F, 0F, 0F, 0F, -0F);
int quads = length >> 2;
// Leave the last quad (4 pixels) for the scalar tail.
int simdQuads = quads - 1;
for (int q = 0; q < simdQuads; q++)
{
Vector512<float> v = ReadVector512(ref src);
Vector512<float> rgbx = Avx512F.PermuteVar16x32(v, perm);
Vector512<float> rgba = Avx512F.BlendVariable(rgbx, ones, alphaSelect);
WriteVector512(ref dst, rgba);
src = ref Unsafe.Add(ref src, 12);
dst = ref Unsafe.Add(ref dst, 16);
i += 4;
}
}
else if (Avx2.IsSupported)
{
// 2 pixels per iteration. Using overlapped 8-float loads.
Vector256<int> perm = Vector256.Create(0, 1, 2, 0, 3, 4, 5, 0);
Vector256<float> ones = Vector256.Create(1F);
// vblendps mask: bit i selects lane i from 'ones' when set.
// We want lanes 3 and 7 -> 0b10001000 = 0x88.
const byte alphaMask = 0x88;
int pairs = length >> 1;
// Leave the last pair (2 pixels) for the scalar tail.
int simdPairs = pairs - 1;
for (int p = 0; p < simdPairs; p++)
{
Vector256<float> v = ReadVector256(ref src);
Vector256<float> rgbx = Avx2.PermuteVar8x32(v, perm);
Vector256<float> rgba = Avx.Blend(rgbx, ones, alphaMask);
WriteVector256(ref dst, rgba);
src = ref Unsafe.Add(ref src, 6);
dst = ref Unsafe.Add(ref dst, 8);
i += 2;
}
}
// Tail (and non-AVX paths)
for (; i < length; i++)
{
Unsafe.Add(ref dstV4, i) = Unsafe.Add(ref srcRgb, i).ToScaledVector4();
}
}
@ -117,10 +196,75 @@ public readonly struct Rgb : IProfileConnectingSpace<Rgb, CieXyz>
{
Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination));
// TODO: Optimize via SIMD
for (int i = 0; i < source.Length; i++)
int length = source.Length;
if (length == 0)
{
destination[i] = FromScaledVector4(source[i]);
return;
}
ref Vector4 srcV4 = ref MemoryMarshal.GetReference(source);
ref Rgb dstRgb = ref MemoryMarshal.GetReference(destination);
// Float streams:
// src: r0 g0 b0 a0 r1 g1 b1 a1 ...
// dst: r0 g0 b0 r1 g1 b1 ...
ref float src = ref Unsafe.As<Vector4, float>(ref srcV4);
ref float dst = ref Unsafe.As<Rgb, float>(ref dstRgb);
int i = 0;
if (Avx512F.IsSupported)
{
// 4 pixels per iteration. Using overlapped 16-float stores:
Vector512<int> idx = Vector512.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
// Number of 4-pixel groups in the input.
int quads = length >> 2;
// Leave the last quad (4 pixels) for the scalar tail.
int simdQuads = quads - 1;
for (int q = 0; q < simdQuads; q++)
{
Vector512<float> v = ReadVector512(ref src);
Vector512<float> packed = Avx512F.PermuteVar16x32(v, idx);
WriteVector512(ref dst, packed);
src = ref Unsafe.Add(ref src, 16);
dst = ref Unsafe.Add(ref dst, 12);
i += 4;
}
}
else if (Avx2.IsSupported)
{
// 2 pixels per iteration, using overlapped 8-float stores:
Vector256<int> idx = Vector256.Create(0, 1, 2, 4, 5, 6, 0, 0);
int pairs = length >> 1;
// Leave the last pair (2 pixels) for the scalar tail.
int simdPairs = pairs - 1;
int pairIndex = 0;
for (; pairIndex < simdPairs; pairIndex++)
{
Vector256<float> v = ReadVector256(ref src);
Vector256<float> packed = Avx2.PermuteVar8x32(v, idx);
WriteVector256(ref dst, packed);
src = ref Unsafe.Add(ref src, 8);
dst = ref Unsafe.Add(ref dst, 6);
i += 2;
}
}
// Tail (and non-AVX paths)
for (; i < length; i++)
{
Vector4 v = Unsafe.Add(ref srcV4, i);
Unsafe.Add(ref dstRgb, i) = FromScaledVector4(v);
}
}
@ -288,4 +432,32 @@ public readonly struct Rgb : IProfileConnectingSpace<Rgb, CieXyz>
M44 = 1F
};
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector512<float> ReadVector512(ref float src)
{
ref byte b = ref Unsafe.As<float, byte>(ref src);
return Unsafe.ReadUnaligned<Vector512<float>>(ref b);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<float> ReadVector256(ref float src)
{
ref byte b = ref Unsafe.As<float, byte>(ref src);
return Unsafe.ReadUnaligned<Vector256<float>>(ref b);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteVector512(ref float dst, Vector512<float> value)
{
ref byte b = ref Unsafe.As<float, byte>(ref dst);
Unsafe.WriteUnaligned(ref b, value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteVector256(ref float dst, Vector256<float> value)
{
ref byte b = ref Unsafe.As<float, byte>(ref dst);
Unsafe.WriteUnaligned(ref b, value);
}
}

1
src/ImageSharp/Formats/Webp/WebpDecoderCore.cs

@ -122,6 +122,7 @@ internal sealed class WebpDecoderCore : ImageDecoderCore, IDisposable
this.ParseOptionalChunks(stream, metadata, this.webImageInfo.Features, buffer);
}
_ = this.TryConvertIccProfile(image);
return image;
}
}

13
tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs

@ -608,4 +608,17 @@ public class WebpDecoderTests
image.DebugSave(provider);
image.CompareToOriginal(provider, ReferenceDecoder);
}
[Theory]
[WithFile(Icc.Perceptual, PixelTypes.Rgba32)]
[WithFile(Icc.PerceptualcLUTOnly, PixelTypes.Rgba32)]
public void Decode_WhenColorProfileHandlingIsConvert_ApplyIccProfile<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{
using Image<TPixel> image = provider.GetImage(WebpDecoder.Instance, new DecoderOptions { ColorProfileHandling = ColorProfileHandling.Convert });
image.DebugSave(provider);
image.CompareToReferenceOutput(provider);
Assert.Null(image.Metadata.IccProfile);
}
}

6
tests/ImageSharp.Tests/TestImages.cs

@ -901,6 +901,12 @@ public static class TestImages
public const string AlphaBlend2 = "Webp/alpha-blend-2.webp";
public const string AlphaBlend3 = "Webp/alpha-blend-3.webp";
public const string AlphaBlend4 = "Webp/alpha-blend-4.webp";
public static class Icc
{
public const string Perceptual = "Webp/icc-profiles/Perceptual.webp";
public const string PerceptualcLUTOnly = "Webp/icc-profiles/Perceptual-cLUT-only.webp";
}
}
public static class Tiff

3
tests/Images/External/ReferenceOutput/WebpDecoderTests/Decode_WhenColorProfileHandlingIsConvert_ApplyIccProfile_Rgba32_Perceptual-cLUT-only.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9a2cbd483eed31cf7b410ef1ee45ae78e77b36e5b9c31f87764a1dfdb9c4e5c8
size 79768

3
tests/Images/External/ReferenceOutput/WebpDecoderTests/Decode_WhenColorProfileHandlingIsConvert_ApplyIccProfile_Rgba32_Perceptual.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:547f3ddb5bdb3a3cb5c87440b65728be295c2b5f3c10a1b0b44299bb3d80e8d7
size 79651

3
tests/Images/Input/Webp/icc-profiles/Perceptual-cLUT-only.webp

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7b7ef9eedd1b2a4b93f11ac52807c071f7d0a24513008c3e69b0d4a0fd9b70db
size 186596

3
tests/Images/Input/Webp/icc-profiles/Perceptual.webp

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:aad13221b103b60c21a19e7ecfbab047404f25269485d26fc5dee4be11188865
size 189800
Loading…
Cancel
Save