Browse Source

Merge branch 'main' into heic-support

pull/2633/head
James Jackson-South 2 years ago
committed by GitHub
parent
commit
43569dbbe9
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 13
      .github/workflows/build-and-test.yml
  2. 10
      .github/workflows/code-coverage.yml
  3. 1
      ImageSharp.sln
  4. 93
      src/ImageSharp/Color/Color.cs
  5. 40
      src/ImageSharp/Common/Helpers/Numerics.cs
  6. 78
      src/ImageSharp/Common/Helpers/SimdUtils.Convert.cs
  7. 182
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  8. 144
      src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
  9. 335
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  10. 118
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  11. 74
      src/ImageSharp/Common/Helpers/Vector128Utilities.cs
  12. 39
      src/ImageSharp/Common/Helpers/Vector256Utilities.cs
  13. 37
      src/ImageSharp/Common/Helpers/Vector512Utilities.cs
  14. 4
      src/ImageSharp/Formats/Gif/GifFrameMetadata.cs
  15. 42
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  16. 32
      src/ImageSharp/Formats/Jpeg/JpegComData.cs
  17. 23
      src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
  18. 52
      src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
  19. 7
      src/ImageSharp/Formats/Jpeg/JpegMetadata.cs
  20. 1
      src/ImageSharp/Formats/Jpeg/MetadataExtensions.cs
  21. 47
      src/ImageSharp/Formats/Png/PngDecoderCore.cs
  22. 6
      src/ImageSharp/Formats/Png/PngDecoderOptions.cs
  23. 20
      src/ImageSharp/Formats/Png/PngEncoderCore.cs
  24. 26
      src/ImageSharp/Formats/Webp/AlphaDecoder.cs
  25. 16
      src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs
  26. 75
      src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs
  27. 4
      src/ImageSharp/Processing/Processors/Drawing/DrawImageProcessor{TPixelBg,TPixelFg}.cs
  28. 102
      tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs
  29. 66
      tests/ImageSharp.Benchmarks/Bulk/FromVector4_Rgb24.cs
  30. 25
      tests/ImageSharp.Benchmarks/Bulk/ToVector4.cs
  31. 4
      tests/ImageSharp.Benchmarks/Bulk/ToVector4_Bgra32.cs
  32. 4
      tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgb24.cs
  33. 72
      tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgba32.cs
  34. 2
      tests/ImageSharp.Benchmarks/LoadResizeSave/README.md
  35. 2
      tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs
  36. 59
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  37. 15
      tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs
  38. 153
      tests/ImageSharp.Tests/Formats/Jpg/JpegEncoderTests.Metadata.cs
  39. 22
      tests/ImageSharp.Tests/Formats/Jpg/JpegMetadataTests.cs
  40. 26
      tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs
  41. 17
      tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs
  42. 11
      tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs
  43. 10
      tests/ImageSharp.Tests/TestImages.cs
  44. 22
      tests/ImageSharp.Tests/TestUtilities/BasicSerializer.cs
  45. 81
      tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs
  46. 3
      tests/Images/External/ReferenceOutput/PngEncoderTests/Issue2668_Quantized_Encode_Alpha_Rgba32_Issue_2668.png
  47. 3
      tests/Images/Input/Jpg/issues/issue-2067-comment.jpg
  48. 3
      tests/Images/Input/Png/issues/Issue_2666.png
  49. 3
      tests/Images/Input/Png/issues/Issue_2668.png
  50. 3
      tests/Images/Input/Png/issues/bad-ztxt.png
  51. 3
      tests/Images/Input/Png/issues/bad-ztxt2.png
  52. 3
      tests/Images/Input/Webp/issues/Issue2670.webp

13
.github/workflows/build-and-test.yml

@ -4,6 +4,7 @@ on:
push:
branches:
- main
- release/*
tags:
- "v*"
pull_request:
@ -67,7 +68,7 @@ jobs:
run: git lfs ls-files -l | cut -d' ' -f1 | sort > .lfs-assets-id
- name: Git Setup LFS Cache
uses: actions/cache@v3
uses: actions/cache@v4
id: lfs-cache
with:
path: .git/lfs
@ -77,10 +78,10 @@ jobs:
run: git lfs pull
- name: NuGet Install
uses: NuGet/setup-nuget@v1
uses: NuGet/setup-nuget@v2
- name: NuGet Setup Cache
uses: actions/cache@v3
uses: actions/cache@v4
id: nuget-cache
with:
path: ~/.nuget
@ -132,7 +133,7 @@ jobs:
XUNIT_PATH: .\tests\ImageSharp.Tests # Required for xunit
- name: Export Failed Output
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
if: failure()
with:
name: actual_output_${{ runner.os }}_${{ matrix.options.framework }}${{ matrix.options.runtime }}.zip
@ -159,10 +160,10 @@ jobs:
submodules: recursive
- name: NuGet Install
uses: NuGet/setup-nuget@v1
uses: NuGet/setup-nuget@v2
- name: NuGet Setup Cache
uses: actions/cache@v3
uses: actions/cache@v4
id: nuget-cache
with:
path: ~/.nuget

10
.github/workflows/code-coverage.yml

@ -34,7 +34,7 @@ jobs:
run: git lfs ls-files -l | cut -d' ' -f1 | sort > .lfs-assets-id
- name: Git Setup LFS Cache
uses: actions/cache@v3
uses: actions/cache@v4
id: lfs-cache
with:
path: .git/lfs
@ -44,10 +44,10 @@ jobs:
run: git lfs pull
- name: NuGet Install
uses: NuGet/setup-nuget@v1
uses: NuGet/setup-nuget@v2
- name: NuGet Setup Cache
uses: actions/cache@v3
uses: actions/cache@v4
id: nuget-cache
with:
path: ~/.nuget
@ -74,14 +74,14 @@ jobs:
XUNIT_PATH: .\tests\ImageSharp.Tests # Required for xunit
- name: Export Failed Output
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
if: failure()
with:
name: actual_output_${{ runner.os }}_${{ matrix.options.framework }}${{ matrix.options.runtime }}.zip
path: tests/Images/ActualOutput/
- name: Codecov Update
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
if: matrix.options.codecov == true && startsWith(github.repository, 'SixLabors')
with:
flags: unittests

1
ImageSharp.sln

@ -238,6 +238,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "issues", "issues", "{5C9B68
tests\Images\Input\Jpg\issues\issue750-exif-tranform.jpg = tests\Images\Input\Jpg\issues\issue750-exif-tranform.jpg
tests\Images\Input\Jpg\issues\Issue845-Incorrect-Quality99.jpg = tests\Images\Input\Jpg\issues\Issue845-Incorrect-Quality99.jpg
tests\Images\Input\Jpg\issues\issue855-incorrect-colorspace.jpg = tests\Images\Input\Jpg\issues\issue855-incorrect-colorspace.jpg
tests\Images\Input\Jpg\issues\issue-2067-comment.jpg = tests\Images\Input\Jpg\issues\issue-2067-comment.jpg
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "fuzz", "fuzz", "{516A3532-6AC2-417B-AD79-9BD5D0D378A0}"

93
src/ImageSharp/Color/Color.cs

@ -25,7 +25,7 @@ public readonly partial struct Color : IEquatable<Color>
/// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary>
/// <param name="vector">The <see cref="Vector4"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private Color(Vector4 vector)
{
this.data = Numerics.Clamp(vector, Vector4.Zero, Vector4.One);
@ -36,28 +36,13 @@ public readonly partial struct Color : IEquatable<Color>
/// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary>
/// <param name="pixel">The pixel containing color information.</param>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private Color(IPixel pixel)
{
this.boxedHighPrecisionPixel = pixel;
this.data = default;
}
/// <summary>
/// Converts a <see cref="Color"/> to <see cref="Vector4"/>.
/// </summary>
/// <param name="color">The <see cref="Color"/>.</param>
/// <returns>The <see cref="Vector4"/>.</returns>
public static explicit operator Vector4(Color color) => color.ToScaledVector4();
/// <summary>
/// Converts an <see cref="Vector4"/> to <see cref="Color"/>.
/// </summary>
/// <param name="source">The <see cref="Vector4"/>.</param>
/// <returns>The <see cref="Color"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static explicit operator Color(Vector4 source) => new(source);
/// <summary>
/// Checks whether two <see cref="Color"/> structures are equal.
/// </summary>
@ -67,7 +52,7 @@ public readonly partial struct Color : IEquatable<Color>
/// True if the <paramref name="left"/> parameter is equal to the <paramref name="right"/> parameter;
/// otherwise, false.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator ==(Color left, Color right) => left.Equals(right);
/// <summary>
@ -79,36 +64,44 @@ public readonly partial struct Color : IEquatable<Color>
/// True if the <paramref name="left"/> parameter is not equal to the <paramref name="right"/> parameter;
/// otherwise, false.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator !=(Color left, Color right) => !left.Equals(right);
/// <summary>
/// Creates a <see cref="Color"/> from the given <typeparamref name="TPixel"/>.
/// </summary>
/// <param name="pixel">The pixel to convert from.</param>
/// <param name="source">The pixel to convert from.</param>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <returns>The <see cref="Color"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static Color FromPixel<TPixel>(TPixel pixel)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Color FromPixel<TPixel>(TPixel source)
where TPixel : unmanaged, IPixel<TPixel>
{
// Avoid boxing in case we can convert to Vector4 safely and efficiently
PixelTypeInfo info = TPixel.GetPixelTypeInfo();
if (info.ComponentInfo.HasValue && info.ComponentInfo.Value.GetMaximumComponentPrecision() <= (int)PixelComponentBitDepth.Bit32)
{
return new(pixel.ToScaledVector4());
return new(source.ToScaledVector4());
}
return new(pixel);
return new(source);
}
/// <summary>
/// Creates a <see cref="Color"/> from a generic scaled <see cref="Vector4"/>.
/// </summary>
/// <param name="source">The vector to load the pixel from.</param>
/// <returns>The <see cref="Color"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Color FromScaledVector(Vector4 source) => new(source);
/// <summary>
/// Bulk converts a span of a specified <typeparamref name="TPixel"/> type to a span of <see cref="Color"/>.
/// </summary>
/// <typeparam name="TPixel">The pixel type to convert to.</typeparam>
/// <param name="source">The source pixel span.</param>
/// <param name="destination">The destination color span.</param>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void FromPixel<TPixel>(ReadOnlySpan<TPixel> source, Span<Color> destination)
where TPixel : unmanaged, IPixel<TPixel>
{
@ -120,7 +113,7 @@ public readonly partial struct Color : IEquatable<Color>
{
for (int i = 0; i < destination.Length; i++)
{
destination[i] = new(source[i].ToScaledVector4());
destination[i] = FromScaledVector(source[i].ToScaledVector4());
}
}
else
@ -143,7 +136,7 @@ public readonly partial struct Color : IEquatable<Color>
/// <returns>
/// The <see cref="Color"/>.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Color ParseHex(string hex)
{
Rgba32 rgba = Rgba32.ParseHex(hex);
@ -162,7 +155,7 @@ public readonly partial struct Color : IEquatable<Color>
/// <returns>
/// The <see cref="bool"/>.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool TryParseHex(string hex, out Color result)
{
result = default;
@ -236,16 +229,16 @@ public readonly partial struct Color : IEquatable<Color>
/// <returns>The color having it's alpha channel altered.</returns>
public Color WithAlpha(float alpha)
{
Vector4 v = (Vector4)this;
Vector4 v = this.ToScaledVector4();
v.W = alpha;
return new Color(v);
return FromScaledVector(v);
}
/// <summary>
/// Gets the hexadecimal representation of the color instance in rrggbbaa form.
/// </summary>
/// <returns>A hexadecimal string representation of the value.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ToHex()
{
if (this.boxedHighPrecisionPixel is not null)
@ -263,8 +256,8 @@ public readonly partial struct Color : IEquatable<Color>
/// Converts the color instance to a specified <typeparamref name="TPixel"/> type.
/// </summary>
/// <typeparam name="TPixel">The pixel type to convert to.</typeparam>
/// <returns>The pixel value.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
/// <returns>The <typeparamref name="TPixel"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TPixel ToPixel<TPixel>()
where TPixel : unmanaged, IPixel<TPixel>
{
@ -281,13 +274,30 @@ public readonly partial struct Color : IEquatable<Color>
return TPixel.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
}
/// <summary>
/// Expands the color into a generic ("scaled") <see cref="Vector4"/> representation
/// with values scaled and clamped between <value>0</value> and <value>1</value>.
/// The vector components are typically expanded in least to greatest significance order.
/// </summary>
/// <returns>The <see cref="Vector4"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Vector4 ToScaledVector4()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data;
}
return this.boxedHighPrecisionPixel.ToScaledVector4();
}
/// <summary>
/// Bulk converts a span of <see cref="Color"/> to a span of a specified <typeparamref name="TPixel"/> type.
/// </summary>
/// <typeparam name="TPixel">The pixel type to convert to.</typeparam>
/// <param name="source">The source color span.</param>
/// <param name="destination">The destination pixel span.</param>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void ToPixel<TPixel>(ReadOnlySpan<Color> source, Span<TPixel> destination)
where TPixel : unmanaged, IPixel<TPixel>
{
@ -301,7 +311,7 @@ public readonly partial struct Color : IEquatable<Color>
}
/// <inheritdoc />
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool Equals(Color other)
{
if (this.boxedHighPrecisionPixel is null && other.boxedHighPrecisionPixel is null)
@ -316,7 +326,7 @@ public readonly partial struct Color : IEquatable<Color>
public override bool Equals(object? obj) => obj is Color other && this.Equals(other);
/// <inheritdoc />
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override int GetHashCode()
{
if (this.boxedHighPrecisionPixel is null)
@ -326,15 +336,4 @@ public readonly partial struct Color : IEquatable<Color>
return this.boxedHighPrecisionPixel.GetHashCode();
}
[MethodImpl(InliningOptions.ShortMethod)]
private Vector4 ToScaledVector4()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data;
}
return this.boxedHighPrecisionPixel.ToScaledVector4();
}
}

40
src/ImageSharp/Common/Helpers/Numerics.cs

@ -1010,6 +1010,26 @@ internal static class Numerics
where TVector : struct
=> (uint)span.Length / (uint)Vector256<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="span">The given span.</param>
/// <returns>Count of vectors that safely fit into the span.</returns>
public static nuint Vector512Count<TVector>(this Span<byte> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="span">The given span.</param>
/// <returns>Count of vectors that safely fit into the span.</returns>
public static nuint Vector512Count<TVector>(this ReadOnlySpan<byte> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
@ -1049,4 +1069,24 @@ internal static class Numerics
public static nuint Vector256Count<TVector>(int length)
where TVector : struct
=> (uint)length / (uint)Vector256<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="span">The given span.</param>
/// <returns>Count of vectors that safely fit into the span.</returns>
public static nuint Vector512Count<TVector>(this Span<float> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into length.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="length">The given length.</param>
/// <returns>Count of vectors that safely fit into the length.</returns>
public static nuint Vector512Count<TVector>(int length)
where TVector : struct
=> (uint)length / (uint)Vector512<TVector>.Count;
}

78
src/ImageSharp/Common/Helpers/SimdUtils.Convert.cs

@ -0,0 +1,78 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp;
internal static partial class SimdUtils
{
/// <summary>
/// Converts all input <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1].
/// <paramref name="source"/> should be the of the same size as <paramref name="destination"/>,
/// but there are no restrictions on the span's length.
/// </summary>
/// <param name="source">The source span of bytes</param>
/// <param name="destination">The destination span of floats</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> destination)
{
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref destination);
if (source.Length > 0)
{
ConvertByteToNormalizedFloatRemainder(source, destination);
}
}
/// <summary>
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'destination' buffer of <see cref="byte"/>.
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
/// <paramref name="source"/> should be the of the same size as <paramref name="destination"/>,
/// but there are no restrictions on the span's length.
/// </summary>
/// <param name="source">The source span of floats</param>
/// <param name="destination">The destination span of bytes</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> destination)
{
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref destination);
if (source.Length > 0)
{
ConvertNormalizedFloatToByteRemainder(source, destination);
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < source.Length; i++)
{
Unsafe.Add(ref dBase, (uint)i) = Unsafe.Add(ref sBase, (uint)i) / 255f;
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> destination)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < source.Length; i++)
{
Unsafe.Add(ref dBase, (uint)i) = ConvertToByte(Unsafe.Add(ref sBase, (uint)i));
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static byte ConvertToByte(float f) => (byte)Numerics.Clamp((f * 255f) + 0.5f, 0, 255f);
}

182
src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -1,182 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp;
internal static partial class SimdUtils
{
/// <summary>
/// Implementation methods based on newer <see cref="Vector{T}"/> API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*).
/// Only accelerated only on RyuJIT having dotnet/coreclr#10662 merged (.NET Core 2.1+ .NET 4.7.2+)
/// See:
/// https://github.com/dotnet/coreclr/pull/10662
/// API Proposal:
/// https://github.com/dotnet/corefx/issues/15957
/// </summary>
public static class ExtendedIntrinsics
{
public static bool IsAvailable { get; } = Vector.IsHardwareAccelerated;
/// <summary>
/// Widen and convert a vector of <see cref="short"/> values into 2 vectors of <see cref="float"/>-s.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void ConvertToSingle(
Vector<short> source,
out Vector<float> dest1,
out Vector<float> dest2)
{
Vector.Widen(source, out Vector<int> i1, out Vector<int> i2);
dest1 = Vector.ConvertToSingle(i1);
dest2 = Vector.ConvertToSingle(i2);
}
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (!IsAvailable)
{
return;
}
int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]);
source = source[adjustedCount..];
dest = dest[adjustedCount..];
}
}
/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (!IsAvailable)
{
return;
}
int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
NormalizedFloatToByteSaturate(source[..adjustedCount], dest[..adjustedCount]);
source = source[adjustedCount..];
dest = dest[adjustedCount..];
}
}
/// <summary>
/// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
/// </summary>
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);
nuint n = dest.VectorCount<byte>();
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest));
for (nuint i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sourceBase, i);
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
Vector<float> f0 = ConvertToSingle(w0);
Vector<float> f1 = ConvertToSingle(w1);
Vector<float> f2 = ConvertToSingle(w2);
Vector<float> f3 = ConvertToSingle(w3);
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary>
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);
nuint n = dest.VectorCount<byte>();
ref Vector<float> sourceBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest));
for (nuint i = 0; i < n; i++)
{
ref Vector<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector<float> f0 = s;
Vector<float> f1 = Unsafe.Add(ref s, 1);
Vector<float> f2 = Unsafe.Add(ref s, 2);
Vector<float> f3 = Unsafe.Add(ref s, 3);
Vector<uint> w0 = ConvertToUInt32(f0);
Vector<uint> w1 = ConvertToUInt32(f1);
Vector<uint> w2 = ConvertToUInt32(f2);
Vector<uint> w3 = ConvertToUInt32(f3);
var u0 = Vector.Narrow(w0, w1);
var u1 = Vector.Narrow(w2, w3);
Unsafe.Add(ref destBase, i) = Vector.Narrow(u0, u1);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<uint> ConvertToUInt32(Vector<float> vf)
{
var maxBytes = new Vector<float>(255f);
vf *= maxBytes;
vf += new Vector<float>(0.5f);
vf = Vector.Min(Vector.Max(vf, Vector<float>.Zero), maxBytes);
var vi = Vector.ConvertToInt32(vf);
return Vector.AsVectorUInt32(vi);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToSingle(Vector<uint> u)
{
var vi = Vector.AsVectorInt32(u);
var v = Vector.ConvertToSingle(vi);
v *= new Vector<float>(1f / 255f);
return v;
}
}
}

144
src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs

@ -1,144 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp;
internal static partial class SimdUtils
{
/// <summary>
/// Fallback implementation based on <see cref="Vector4"/> (128bit).
/// For <see cref="Vector4"/>, efficient software fallback implementations are present,
/// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P
/// </summary>
public static class FallbackIntrinsics128
{
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
int remainder = Numerics.Modulo4(source.Length);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]);
source = source[adjustedCount..];
dest = dest[adjustedCount..];
}
}
/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
int remainder = Numerics.Modulo4(source.Length);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
NormalizedFloatToByteSaturate(
source[..adjustedCount],
dest[..adjustedCount]);
source = source[adjustedCount..];
dest = dest[adjustedCount..];
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.ByteToNormalizedFloat"/> using <see cref="Vector4"/>.
/// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, 4);
uint count = (uint)dest.Length / 4;
if (count == 0)
{
return;
}
ref ByteVector4 sBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(source));
ref Vector4 dBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(dest));
const float scale = 1f / 255f;
Vector4 d = default;
for (nuint i = 0; i < count; i++)
{
ref ByteVector4 s = ref Unsafe.Add(ref sBase, i);
d.X = s.X;
d.Y = s.Y;
d.Z = s.Z;
d.W = s.W;
d *= scale;
Unsafe.Add(ref dBase, i) = d;
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/> using <see cref="Vector4"/>.
/// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, 4);
uint count = (uint)source.Length / 4;
if (count == 0)
{
return;
}
ref Vector4 sBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(source));
ref ByteVector4 dBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(dest));
var half = new Vector4(0.5f);
var maxBytes = new Vector4(255f);
for (nuint i = 0; i < count; i++)
{
Vector4 s = Unsafe.Add(ref sBase, i);
s *= maxBytes;
s += half;
s = Numerics.Clamp(s, Vector4.Zero, maxBytes);
ref ByteVector4 d = ref Unsafe.Add(ref dBase, i);
d.X = (byte)s.X;
d.Y = (byte)s.Y;
d.Z = (byte)s.Z;
d.W = (byte)s.W;
}
}
[StructLayout(LayoutKind.Sequential)]
private struct ByteVector4
{
public byte X;
public byte Y;
public byte Z;
public byte W;
}
}
}

335
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -17,8 +17,13 @@ internal static partial class SimdUtils
{
public static class HwIntrinsics
{
#pragma warning disable SA1117 // Parameters should be on same line or separate lines
#pragma warning disable SA1137 // Elements should have the same indentation
[MethodImpl(MethodImplOptions.AggressiveInlining)] // too much IL for JIT to inline, so give a hint
public static Vector256<int> PermuteMaskDeinterleave8x32() => Vector256.Create(0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsInt32();
public static Vector256<int> PermuteMaskDeinterleave8x32() => Vector256.Create(0, 4, 1, 5, 2, 6, 3, 7);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector512<int> PermuteMaskDeinterleave16x32() => Vector512.Create(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<uint> PermuteMaskEvenOdd8x32() => Vector256.Create(0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
@ -38,17 +43,15 @@ internal static partial class SimdUtils
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> ShuffleMaskSlice4Nx16() => Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80);
#pragma warning disable SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> ShuffleMaskShiftAlpha() => Vector256.Create((byte)
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
private static Vector256<byte> ShuffleMaskShiftAlpha() => Vector256.Create(
(byte)0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<uint> PermuteMaskShiftAlpha8x32() => Vector256.Create(
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
#pragma warning restore SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
public static Vector256<uint> PermuteMaskShiftAlpha8x32() => Vector256.Create(0u, 1, 2, 4, 5, 6, 3, 7);
#pragma warning restore SA1137 // Elements should have the same indentation
#pragma warning restore SA1117 // Parameters should be on same line or separate lines
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
@ -749,17 +752,23 @@ internal static partial class SimdUtils
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
ref Span<float> destination)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
if (Avx2.IsSupported || Sse2.IsSupported)
if (Vector128.IsHardwareAccelerated)
{
int remainder;
if (Avx2.IsSupported)
if (Vector512.IsHardwareAccelerated && Avx512F.IsSupported)
{
remainder = Numerics.ModuloP2(source.Length, Vector512<byte>.Count);
}
else if (Avx2.IsSupported)
{
remainder = Numerics.ModuloP2(source.Length, Vector256<byte>.Count);
}
@ -772,10 +781,10 @@ internal static partial class SimdUtils
if (adjustedCount > 0)
{
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]);
ByteToNormalizedFloat(source[..adjustedCount], destination[..adjustedCount]);
source = source[adjustedCount..];
dest = dest[adjustedCount..];
destination = destination[adjustedCount..];
}
}
}
@ -783,97 +792,126 @@ internal static partial class SimdUtils
/// <summary>
/// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
/// </summary>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
/// <remarks>
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182
/// </remarks>
internal static unsafe void ByteToNormalizedFloat(
ReadOnlySpan<byte> source,
Span<float> dest)
Span<float> destination)
{
fixed (byte* sourceBase = source)
if (Vector512.IsHardwareAccelerated && Avx512F.IsSupported)
{
if (Avx2.IsSupported)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
nuint n = dest.Vector256Count<byte>();
DebugVerifySpanInput(source, destination, Vector512<byte>.Count);
ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
nuint n = destination.Vector512Count<byte>();
Vector256<float> scale = Vector256.Create(1 / (float)byte.MaxValue);
ref byte sourceBase = ref MemoryMarshal.GetReference(source);
ref Vector512<float> destinationBase = ref Unsafe.As<float, Vector512<float>>(ref MemoryMarshal.GetReference(destination));
for (nuint i = 0; i < n; i++)
{
nuint si = (uint)Vector256<byte>.Count * i;
Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count);
Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2));
Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3));
Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
for (nuint i = 0; i < n; i++)
{
nuint si = (uint)Vector512<byte>.Count * i;
Vector512<int> i0 = Avx512F.ConvertToVector512Int32(Vector128.LoadUnsafe(ref sourceBase, si));
Vector512<int> i1 = Avx512F.ConvertToVector512Int32(Vector128.LoadUnsafe(ref sourceBase, si + (nuint)Vector512<int>.Count));
Vector512<int> i2 = Avx512F.ConvertToVector512Int32(Vector128.LoadUnsafe(ref sourceBase, si + (nuint)(Vector512<int>.Count * 2)));
Vector512<int> i3 = Avx512F.ConvertToVector512Int32(Vector128.LoadUnsafe(ref sourceBase, si + (nuint)(Vector512<int>.Count * 3)));
// Declare multiplier on each line. Codegen is better.
Vector512<float> f0 = Vector512.Create(1 / (float)byte.MaxValue) * Avx512F.ConvertToVector512Single(i0);
Vector512<float> f1 = Vector512.Create(1 / (float)byte.MaxValue) * Avx512F.ConvertToVector512Single(i1);
Vector512<float> f2 = Vector512.Create(1 / (float)byte.MaxValue) * Avx512F.ConvertToVector512Single(i2);
Vector512<float> f3 = Vector512.Create(1 / (float)byte.MaxValue) * Avx512F.ConvertToVector512Single(i3);
ref Vector512<float> d = ref Unsafe.Add(ref destinationBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
else
}
else if (Avx2.IsSupported)
{
DebugVerifySpanInput(source, destination, Vector256<byte>.Count);
nuint n = destination.Vector256Count<byte>();
ref byte sourceBase = ref MemoryMarshal.GetReference(source);
ref Vector256<float> destinationBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
for (nuint i = 0; i < n; i++)
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
nuint si = (uint)Vector256<byte>.Count * i;
Vector256<int> i0 = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sourceBase, si));
Vector256<int> i1 = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sourceBase, si + (nuint)Vector256<int>.Count));
Vector256<int> i2 = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sourceBase, si + (nuint)(Vector256<int>.Count * 2)));
// Ensure overreads past 16 byte boundary do not happen in debug due to lack of containment.
ref ulong refULong = ref Unsafe.As<byte, ulong>(ref Unsafe.Add(ref sourceBase, si));
Vector256<int> i3 = Avx2.ConvertToVector256Int32(Vector128.CreateScalarUnsafe(Unsafe.Add(ref refULong, 3)).AsByte());
// Declare multiplier on each line. Codegen is better.
Vector256<float> f0 = Vector256.Create(1 / (float)byte.MaxValue) * Avx.ConvertToVector256Single(i0);
Vector256<float> f1 = Vector256.Create(1 / (float)byte.MaxValue) * Avx.ConvertToVector256Single(i1);
Vector256<float> f2 = Vector256.Create(1 / (float)byte.MaxValue) * Avx.ConvertToVector256Single(i2);
Vector256<float> f3 = Vector256.Create(1 / (float)byte.MaxValue) * Avx.ConvertToVector256Single(i3);
ref Vector256<float> d = ref Unsafe.Add(ref destinationBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
else if (Vector128.IsHardwareAccelerated)
{
DebugVerifySpanInput(source, destination, Vector128<byte>.Count);
nuint n = dest.Vector128Count<byte>();
nuint n = destination.Vector128Count<byte>();
ref byte sourceBase = ref MemoryMarshal.GetReference(source);
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));
ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
Vector128<float> scale = Vector128.Create(1 / (float)byte.MaxValue);
for (nuint i = 0; i < n; i++)
{
nuint si = (uint)Vector128<byte>.Count * i;
Vector128<float> scale = Vector128.Create(1 / (float)byte.MaxValue);
Vector128<byte> zero = Vector128<byte>.Zero;
Vector128<int> i0, i1, i2, i3;
if (Sse41.IsSupported)
{
ref int refInt = ref Unsafe.As<byte, int>(ref Unsafe.Add(ref sourceBase, si));
for (nuint i = 0; i < n; i++)
i0 = Sse41.ConvertToVector128Int32(Vector128.CreateScalarUnsafe(refInt).AsByte());
i1 = Sse41.ConvertToVector128Int32(Vector128.CreateScalarUnsafe(Unsafe.Add(ref refInt, 1)).AsByte());
i2 = Sse41.ConvertToVector128Int32(Vector128.CreateScalarUnsafe(Unsafe.Add(ref refInt, 2)).AsByte());
i3 = Sse41.ConvertToVector128Int32(Vector128.CreateScalarUnsafe(Unsafe.Add(ref refInt, 3)).AsByte());
}
else
{
nuint si = (uint)Vector128<byte>.Count * i;
Vector128<int> i0, i1, i2, i3;
if (Sse41.IsSupported)
{
i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count);
i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2));
i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3));
}
else
{
Vector128<byte> b = Sse2.LoadVector128(sourceBase + si);
Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16();
Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16();
i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
}
Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
// Sse2, AdvSimd, etc
Vector128<byte> b = Vector128.LoadUnsafe(ref sourceBase, si);
(Vector128<ushort> s0, Vector128<ushort> s1) = Vector128.Widen(b);
(i0, i1) = Vector128.Widen(s0.AsInt16());
(i2, i3) = Vector128.Widen(s1.AsInt16());
}
Vector128<float> f0 = scale * Vector128.ConvertToSingle(i0);
Vector128<float> f1 = scale * Vector128.ConvertToSingle(i1);
Vector128<float> f2 = scale * Vector128.ConvertToSingle(i2);
Vector128<float> f3 = scale * Vector128.ConvertToSingle(i3);
ref Vector128<float> d = ref Unsafe.Add(ref destinationBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
}
@ -881,17 +919,24 @@ internal static partial class SimdUtils
/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
ref Span<byte> destination)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
if (Avx2.IsSupported || Sse2.IsSupported)
if (Sse2.IsSupported || AdvSimd.IsSupported)
{
int remainder;
if (Avx2.IsSupported)
if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported)
{
remainder = Numerics.ModuloP2(source.Length, Vector512<byte>.Count);
}
else if (Avx2.IsSupported)
{
remainder = Numerics.ModuloP2(source.Length, Vector256<byte>.Count);
}
@ -906,10 +951,10 @@ internal static partial class SimdUtils
{
NormalizedFloatToByteSaturate(
source[..adjustedCount],
dest[..adjustedCount]);
destination[..adjustedCount]);
source = source[adjustedCount..];
dest = dest[adjustedCount..];
destination = destination[adjustedCount..];
}
}
}
@ -917,25 +962,58 @@ internal static partial class SimdUtils
/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
/// <remarks>
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622
/// </remarks>
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
Span<byte> destination)
{
if (Avx2.IsSupported)
if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
DebugVerifySpanInput(source, destination, Vector512<byte>.Count);
nuint n = dest.Vector256Count<byte>();
nuint n = destination.Vector512Count<byte>();
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector512<float> sourceBase = ref Unsafe.As<float, Vector512<float>>(ref MemoryMarshal.GetReference(source));
ref Vector512<byte> destinationBase = ref Unsafe.As<byte, Vector512<byte>>(ref MemoryMarshal.GetReference(destination));
ref Vector256<byte> destBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
Vector512<float> scale = Vector512.Create((float)byte.MaxValue);
Vector512<int> mask = PermuteMaskDeinterleave16x32();
for (nuint i = 0; i < n; i++)
{
ref Vector512<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector512<float> f0 = scale * s;
Vector512<float> f1 = scale * Unsafe.Add(ref s, 1);
Vector512<float> f2 = scale * Unsafe.Add(ref s, 2);
Vector512<float> f3 = scale * Unsafe.Add(ref s, 3);
Vector512<int> w0 = Vector512Utilities.ConvertToInt32RoundToEven(f0);
Vector512<int> w1 = Vector512Utilities.ConvertToInt32RoundToEven(f1);
Vector512<int> w2 = Vector512Utilities.ConvertToInt32RoundToEven(f2);
Vector512<int> w3 = Vector512Utilities.ConvertToInt32RoundToEven(f3);
Vector512<short> u0 = Avx512BW.PackSignedSaturate(w0, w1);
Vector512<short> u1 = Avx512BW.PackSignedSaturate(w2, w3);
Vector512<byte> b = Avx512BW.PackUnsignedSaturate(u0, u1);
b = Avx512F.PermuteVar16x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destinationBase, i) = b;
}
}
else if (Avx2.IsSupported)
{
DebugVerifySpanInput(source, destination, Vector256<byte>.Count);
nuint n = destination.Vector256Count<byte>();
ref Vector256<float> sourceBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<byte> destinationBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(destination));
Vector256<float> scale = Vector256.Create((float)byte.MaxValue);
Vector256<int> mask = PermuteMaskDeinterleave8x32();
@ -944,36 +1022,33 @@ internal static partial class SimdUtils
{
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector256<float> f0 = Avx.Multiply(scale, s);
Vector256<float> f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1));
Vector256<float> f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2));
Vector256<float> f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3));
Vector256<float> f0 = scale * s;
Vector256<float> f1 = scale * Unsafe.Add(ref s, 1);
Vector256<float> f2 = scale * Unsafe.Add(ref s, 2);
Vector256<float> f3 = scale * Unsafe.Add(ref s, 3);
Vector256<int> w0 = Avx.ConvertToVector256Int32(f0);
Vector256<int> w1 = Avx.ConvertToVector256Int32(f1);
Vector256<int> w2 = Avx.ConvertToVector256Int32(f2);
Vector256<int> w3 = Avx.ConvertToVector256Int32(f3);
Vector256<int> w0 = Vector256Utilities.ConvertToInt32RoundToEven(f0);
Vector256<int> w1 = Vector256Utilities.ConvertToInt32RoundToEven(f1);
Vector256<int> w2 = Vector256Utilities.ConvertToInt32RoundToEven(f2);
Vector256<int> w3 = Vector256Utilities.ConvertToInt32RoundToEven(f3);
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destBase, i) = b;
Unsafe.Add(ref destinationBase, i) = b;
}
}
else
else if (Sse2.IsSupported || AdvSimd.IsSupported)
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
// Sse, AdvSimd
DebugVerifySpanInput(source, destination, Vector128<byte>.Count);
nuint n = dest.Vector128Count<byte>();
nuint n = destination.Vector128Count<byte>();
ref Vector128<float> sourceBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
ref Vector128<float> sourceBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destinationBase = ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(destination));
Vector128<float> scale = Vector128.Create((float)byte.MaxValue);
@ -981,20 +1056,20 @@ internal static partial class SimdUtils
{
ref Vector128<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector128<float> f0 = Sse.Multiply(scale, s);
Vector128<float> f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1));
Vector128<float> f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2));
Vector128<float> f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3));
Vector128<float> f0 = scale * s;
Vector128<float> f1 = scale * Unsafe.Add(ref s, 1);
Vector128<float> f2 = scale * Unsafe.Add(ref s, 2);
Vector128<float> f3 = scale * Unsafe.Add(ref s, 3);
Vector128<int> w0 = Sse2.ConvertToVector128Int32(f0);
Vector128<int> w1 = Sse2.ConvertToVector128Int32(f1);
Vector128<int> w2 = Sse2.ConvertToVector128Int32(f2);
Vector128<int> w3 = Sse2.ConvertToVector128Int32(f3);
Vector128<int> w0 = Vector128Utilities.ConvertToInt32RoundToEven(f0);
Vector128<int> w1 = Vector128Utilities.ConvertToInt32RoundToEven(f1);
Vector128<int> w2 = Vector128Utilities.ConvertToInt32RoundToEven(f2);
Vector128<int> w3 = Vector128Utilities.ConvertToInt32RoundToEven(f3);
Vector128<short> u0 = Sse2.PackSignedSaturate(w0, w1);
Vector128<short> u1 = Sse2.PackSignedSaturate(w2, w3);
Vector128<short> u0 = Vector128Utilities.PackSignedSaturate(w0, w1);
Vector128<short> u1 = Vector128Utilities.PackSignedSaturate(w2, w3);
Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1);
Unsafe.Add(ref destinationBase, i) = Vector128Utilities.PackUnsignedSaturate(u0, u1);
}
}
}

118
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -22,13 +22,6 @@ internal static partial class SimdUtils
public static bool HasVector8 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
/// <summary>
/// Gets a value indicating whether <see cref="Vector{T}"/> code is being JIT-ed to SSE instructions
/// where float and integer registers are of size 128 byte.
/// </summary>
public static bool HasVector4 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 4;
/// <summary>
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
/// </summary>
@ -69,111 +62,8 @@ internal static partial class SimdUtils
}
}
/// <summary>
/// Converts all input <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1].
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
/// but there are no restrictions on the span's length.
/// </summary>
/// <param name="source">The source span of bytes</param>
/// <param name="dest">The destination span of floats</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest);
// Also deals with the remainder from previous conversions:
FallbackIntrinsics128.ByteToNormalizedFloatReduce(ref source, ref dest);
// Deal with the remainder:
if (source.Length > 0)
{
ConvertByteToNormalizedFloatRemainder(source, dest);
}
}
/// <summary>
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/>.
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
/// but there are no restrictions on the span's length.
/// </summary>
/// <param name="source">The source span of floats</param>
/// <param name="dest">The destination span of bytes</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
// Also deals with the remainder from previous conversions:
FallbackIntrinsics128.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
// Deal with the remainder:
if (source.Length > 0)
{
ConvertNormalizedFloatToByteRemainder(source, dest);
}
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
// There are at most 3 elements at this point, having a for loop is overkill.
// Let's minimize the no. of instructions!
switch (source.Length)
{
case 3:
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2) / 255f;
goto case 2;
case 2:
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1) / 255f;
goto case 1;
case 1:
dBase = sBase / 255f;
break;
}
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> dest)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
switch (source.Length)
{
case 3:
Unsafe.Add(ref dBase, 2) = ConvertToByte(Unsafe.Add(ref sBase, 2));
goto case 2;
case 2:
Unsafe.Add(ref dBase, 1) = ConvertToByte(Unsafe.Add(ref sBase, 1));
goto case 1;
case 1:
dBase = ConvertToByte(sBase);
break;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static byte ConvertToByte(float f) => (byte)Numerics.Clamp((f * 255F) + 0.5F, 0, 255F);
[Conditional("DEBUG")]
private static void VerifyHasVector8(string operation)
{
if (!HasVector8)
{
throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
}
}
[Conditional("DEBUG")]
private static void VerifySpanInput(ReadOnlySpan<byte> source, Span<float> dest, int shouldBeDivisibleBy)
private static void DebugVerifySpanInput(ReadOnlySpan<byte> source, Span<float> dest, int shouldBeDivisibleBy)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(
@ -183,11 +73,11 @@ internal static partial class SimdUtils
}
[Conditional("DEBUG")]
private static void VerifySpanInput(ReadOnlySpan<float> source, Span<byte> dest, int shouldBeDivisibleBy)
private static void DebugVerifySpanInput(ReadOnlySpan<float> source, Span<byte> destination, int shouldBeDivisibleBy)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(
Numerics.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
Numerics.ModuloP2(destination.Length, shouldBeDivisibleBy) == 0,
nameof(source),
$"length should be divisible by {shouldBeDivisibleBy}!");
}

74
src/ImageSharp/Common/Helpers/Vector128Utilities.cs

@ -62,6 +62,7 @@ internal static class Vector128Utilities
/// <param name="vector">The input vector from which values are selected.</param>
/// <param name="control">The shuffle control byte.</param>
/// <returns>The <see cref="Vector128{Single}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpected] byte control)
{
if (Sse.IsSupported)
@ -84,6 +85,7 @@ internal static class Vector128Utilities
/// <returns>
/// A new vector containing the values from <paramref name="vector" /> selected by the given <paramref name="indices" />.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> indices)
{
if (Ssse3.IsSupported)
@ -155,6 +157,7 @@ internal static class Vector128Utilities
/// <param name="right">The right hand source vector.</param>
/// <param name="mask">An 8-bit mask used for the operation.</param>
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> AlignRight(Vector128<byte> left, Vector128<byte> right, [ConstantExpected(Max = (byte)15)] byte mask)
{
if (Ssse3.IsSupported)
@ -171,6 +174,77 @@ internal static class Vector128Utilities
return default;
}
/// <summary>
/// Performs a conversion from a 128-bit vector of 4 single-precision floating-point values to a 128-bit vector of 4 signed 32-bit integer values.
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
/// </summary>
/// <param name="vector">The value to convert.</param>
/// <returns>The <see cref="Vector128{Int32}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> ConvertToInt32RoundToEven(Vector128<float> vector)
{
if (Sse2.IsSupported)
{
return Sse2.ConvertToVector128Int32(vector);
}
if (AdvSimd.IsSupported)
{
return AdvSimd.ConvertToInt32RoundToEven(vector);
}
Vector128<float> sign = vector & Vector128.Create(-0.0f);
Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608.0f);
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector128.ConvertToInt32(val_2p23_f32 | sign);
}
/// <summary>
/// Packs signed 16-bit integers to unsigned 8-bit integers and saturates.
/// </summary>
/// <param name="left">The left hand source vector.</param>
/// <param name="right">The right hand source vector.</param>
/// <returns>The <see cref="Vector128{Int16}"/>.</returns>
public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
{
return Sse2.PackUnsignedSaturate(left, right);
}
if (AdvSimd.IsSupported)
{
return AdvSimd.ExtractNarrowingSaturateUnsignedUpper(AdvSimd.ExtractNarrowingSaturateUnsignedLower(left), right);
}
ThrowUnreachableException();
return default;
}
/// <summary>
/// Packs signed 32-bit integers to signed 16-bit integers and saturates.
/// </summary>
/// <param name="left">The left hand source vector.</param>
/// <param name="right">The right hand source vector.</param>
/// <returns>The <see cref="Vector128{Int16}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128<int> right)
{
if (Sse2.IsSupported)
{
return Sse2.PackSignedSaturate(left, right);
}
if (AdvSimd.IsSupported)
{
return AdvSimd.ExtractNarrowingSaturateUpper(AdvSimd.ExtractNarrowingSaturateLower(left), right);
}
ThrowUnreachableException();
return default;
}
[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}

39
src/ImageSharp/Common/Helpers/Vector256Utilities.cs

@ -25,7 +25,7 @@ internal static class Vector256Utilities
public static bool SupportsShuffleFloat
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Avx.IsSupported;
get => Avx.IsSupported || Sse.IsSupported;
}
/// <summary>
@ -43,6 +43,7 @@ internal static class Vector256Utilities
/// <param name="vector">The input vector from which values are selected.</param>
/// <param name="control">The shuffle control byte.</param>
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Shuffle(Vector256<float> vector, [ConstantExpected] byte control)
{
if (Avx.IsSupported)
@ -50,6 +51,13 @@ internal static class Vector256Utilities
return Avx.Shuffle(vector, vector, control);
}
if (Sse.IsSupported)
{
Vector128<float> lower = vector.GetLower();
Vector128<float> upper = vector.GetUpper();
return Vector256.Create(Sse.Shuffle(lower, lower, control), Sse.Shuffle(upper, upper, control));
}
ThrowUnreachableException();
return default;
}
@ -62,6 +70,7 @@ internal static class Vector256Utilities
/// The per-element indices used to select a value from <paramref name="vector" />.
/// </param>
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<byte> Shuffle(Vector256<byte> vector, Vector256<byte> indices)
{
if (Avx2.IsSupported)
@ -73,6 +82,34 @@ internal static class Vector256Utilities
return default;
}
/// <summary>
/// Performs a conversion from a 256-bit vector of 8 single-precision floating-point values to a 256-bit vector of 8 signed 32-bit integer values.
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
/// </summary>
/// <param name="vector">The value to convert.</param>
/// <returns>The <see cref="Vector256{Int32}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<int> ConvertToInt32RoundToEven(Vector256<float> vector)
{
if (Avx.IsSupported)
{
return Avx.ConvertToVector256Int32(vector);
}
if (Sse2.IsSupported)
{
Vector128<int> lower = Sse2.ConvertToVector128Int32(vector.GetLower());
Vector128<int> upper = Sse2.ConvertToVector128Int32(vector.GetUpper());
return Vector256.Create(lower, upper);
}
Vector256<float> sign = vector & Vector256.Create(-0.0f);
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608.0f);
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector256.ConvertToInt32(val_2p23_f32 | sign);
}
[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}

37
src/ImageSharp/Common/Helpers/Vector512Utilities.cs

@ -25,7 +25,7 @@ internal static class Vector512Utilities
public static bool SupportsShuffleFloat
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Avx512F.IsSupported;
get => Avx512F.IsSupported || Avx.IsSupported;
}
/// <summary>
@ -51,6 +51,13 @@ internal static class Vector512Utilities
return Avx512F.Shuffle(vector, vector, control);
}
if (Avx.IsSupported)
{
Vector256<float> lower = vector.GetLower();
Vector256<float> upper = vector.GetUpper();
return Vector512.Create(Avx.Shuffle(lower, lower, control), Avx.Shuffle(upper, upper, control));
}
ThrowUnreachableException();
return default;
}
@ -75,6 +82,34 @@ internal static class Vector512Utilities
return default;
}
/// <summary>
/// Performs a conversion from a 512-bit vector of 16 single-precision floating-point values to a 512-bit vector of 16 signed 32-bit integer values.
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
/// </summary>
/// <param name="vector">The value to convert.</param>
/// <returns>The <see cref="Vector128{Int32}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
{
if (Avx512F.IsSupported)
{
return Avx512F.ConvertToVector512Int32(vector);
}
if (Avx.IsSupported)
{
Vector256<int> lower = Avx.ConvertToVector256Int32(vector.GetLower());
Vector256<int> upper = Avx.ConvertToVector256Int32(vector.GetUpper());
return Vector512.Create(lower, upper);
}
Vector512<float> sign = vector & Vector512.Create(-0.0f);
Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608.0f);
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector512.ConvertToInt32(val_2p23_f32 | sign);
}
[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}

4
src/ImageSharp/Formats/Gif/GifFrameMetadata.cs

@ -82,13 +82,13 @@ public class GifFrameMetadata : IDeepCloneable
{
// TODO: v4 How do I link the parent metadata to the frame metadata to get the global color table?
int index = -1;
float background = 1f;
const float background = 1f;
if (metadata.ColorTable.HasValue)
{
ReadOnlySpan<Color> colorTable = metadata.ColorTable.Value.Span;
for (int i = 0; i < colorTable.Length; i++)
{
Vector4 vector = (Vector4)colorTable[i];
Vector4 vector = colorTable[i].ToScaledVector4();
if (vector.W < background)
{
index = i;

42
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -386,29 +386,33 @@ internal partial struct Block8x8F : IEquatable<Block8x8F>
public void LoadFromInt16ExtendedAvx2(ref Block8x8 source)
{
DebugGuard.IsTrue(
SimdUtils.HasVector8,
Avx2.IsSupported,
"LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!");
ref Vector<short> sRef = ref Unsafe.As<Block8x8, Vector<short>>(ref source);
ref Vector<float> dRef = ref Unsafe.As<Block8x8F, Vector<float>>(ref this);
ref short sRef = ref Unsafe.As<Block8x8, short>(ref source);
ref Vector256<float> dRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref this);
// Vector<ushort>.Count == 16 on AVX2
// Vector256<ushort>.Count == 16 on AVX2
// We can process 2 block rows in a single step
SimdUtils.ExtendedIntrinsics.ConvertToSingle(sRef, out Vector<float> top, out Vector<float> bottom);
dRef = top;
Unsafe.Add(ref dRef, 1) = bottom;
SimdUtils.ExtendedIntrinsics.ConvertToSingle(Unsafe.Add(ref sRef, 1), out top, out bottom);
Unsafe.Add(ref dRef, 2) = top;
Unsafe.Add(ref dRef, 3) = bottom;
SimdUtils.ExtendedIntrinsics.ConvertToSingle(Unsafe.Add(ref sRef, 2), out top, out bottom);
Unsafe.Add(ref dRef, 4) = top;
Unsafe.Add(ref dRef, 5) = bottom;
SimdUtils.ExtendedIntrinsics.ConvertToSingle(Unsafe.Add(ref sRef, 3), out top, out bottom);
Unsafe.Add(ref dRef, 6) = top;
Unsafe.Add(ref dRef, 7) = bottom;
Vector256<int> top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef));
Vector256<int> bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count));
dRef = Avx.ConvertToVector256Single(top);
Unsafe.Add(ref dRef, 1) = Avx.ConvertToVector256Single(bottom);
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2)));
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3)));
Unsafe.Add(ref dRef, 2) = Avx.ConvertToVector256Single(top);
Unsafe.Add(ref dRef, 3) = Avx.ConvertToVector256Single(bottom);
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4)));
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5)));
Unsafe.Add(ref dRef, 4) = Avx.ConvertToVector256Single(top);
Unsafe.Add(ref dRef, 5) = Avx.ConvertToVector256Single(bottom);
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6)));
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7)));
Unsafe.Add(ref dRef, 6) = Avx.ConvertToVector256Single(top);
Unsafe.Add(ref dRef, 7) = Avx.ConvertToVector256Single(bottom);
}
/// <summary>

32
src/ImageSharp/Formats/Jpeg/JpegComData.cs

@ -0,0 +1,32 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
namespace SixLabors.ImageSharp.Formats.Jpeg;
/// <summary>
/// Represents a JPEG comment
/// </summary>
public readonly struct JpegComData
{
/// <summary>
/// Initializes a new instance of the <see cref="JpegComData"/> struct.
/// </summary>
/// <param name="value">The comment buffer.</param>
public JpegComData(ReadOnlyMemory<char> value)
=> this.Value = value;
/// <summary>
/// Gets the value.
/// </summary>
public ReadOnlyMemory<char> Value { get; }
/// <summary>
/// Converts string to <see cref="JpegComData"/>
/// </summary>
/// <param name="value">The comment string.</param>
/// <returns>The <see cref="JpegComData"/></returns>
public static JpegComData FromString(string value) => new(value.AsMemory());
/// <inheritdoc/>
public override string ToString() => this.Value.ToString();
}

23
src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs

@ -480,9 +480,11 @@ internal sealed class JpegDecoderCore : IRawJpegData, IImageDecoderInternals
break;
case JpegConstants.Markers.APP15:
case JpegConstants.Markers.COM:
stream.Skip(markerContentByteSize);
break;
case JpegConstants.Markers.COM:
this.ProcessComMarker(stream, markerContentByteSize);
break;
case JpegConstants.Markers.DAC:
if (metadataOnly)
@ -515,6 +517,25 @@ internal sealed class JpegDecoderCore : IRawJpegData, IImageDecoderInternals
this.scanDecoder = null;
}
/// <summary>
/// Assigns COM marker bytes to comment property
/// </summary>
/// <param name="stream">The input stream.</param>
/// <param name="markerContentByteSize">The remaining bytes in the segment block.</param>
private void ProcessComMarker(BufferedReadStream stream, int markerContentByteSize)
{
char[] chars = new char[markerContentByteSize];
JpegMetadata metadata = this.Metadata.GetFormatMetadata(JpegFormat.Instance);
for (int i = 0; i < markerContentByteSize; i++)
{
int read = stream.ReadByte();
chars[i] = (char)read;
}
metadata.Comments.Add(new JpegComData(chars));
}
/// <summary>
/// Returns encoded colorspace based on the adobe APP14 marker.
/// </summary>

52
src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs

@ -2,6 +2,7 @@
// Licensed under the Six Labors Split License.
#nullable disable
using System.Buffers;
using System.Buffers.Binary;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
@ -25,6 +26,9 @@ internal sealed unsafe partial class JpegEncoderCore : IImageEncoderInternals
/// </summary>
private static readonly JpegFrameConfig[] FrameConfigs = CreateFrameConfigs();
/// <summary>
/// The current calling encoder.
/// </summary>
private readonly JpegEncoder encoder;
/// <summary>
@ -89,6 +93,9 @@ internal sealed unsafe partial class JpegEncoderCore : IImageEncoderInternals
// Write Exif, XMP, ICC and IPTC profiles
this.WriteProfiles(metadata, buffer);
// Write comments
this.WriteComments(image.Configuration, jpegMetadata);
// Write the image dimensions.
this.WriteStartOfFrame(image.Width, image.Height, frameConfig, buffer);
@ -167,6 +174,51 @@ internal sealed unsafe partial class JpegEncoderCore : IImageEncoderInternals
this.outputStream.Write(buffer, 0, 18);
}
/// <summary>
/// Writes the COM tags.
/// </summary>
/// <param name="configuration">The configuration.</param>
/// <param name="metadata">The image metadata.</param>
private void WriteComments(Configuration configuration, JpegMetadata metadata)
{
if (metadata.Comments.Count == 0)
{
return;
}
const int maxCommentLength = 65533;
using IMemoryOwner<byte> bufferOwner = configuration.MemoryAllocator.Allocate<byte>(maxCommentLength);
Span<byte> buffer = bufferOwner.Memory.Span;
foreach (JpegComData comment in metadata.Comments)
{
int totalLength = comment.Value.Length;
if (totalLength == 0)
{
continue;
}
// Loop through and split the comment into multiple comments if the comment length
// is greater than the maximum allowed length.
while (totalLength > 0)
{
int currentLength = Math.Min(totalLength, maxCommentLength);
// Write the marker header.
this.WriteMarkerHeader(JpegConstants.Markers.COM, currentLength + 2, buffer);
ReadOnlySpan<char> commentValue = comment.Value.Span.Slice(comment.Value.Length - totalLength, currentLength);
for (int i = 0; i < commentValue.Length; i++)
{
buffer[i] = (byte)commentValue[i];
}
// Write the comment.
this.outputStream.Write(buffer, 0, currentLength);
totalLength -= currentLength;
}
}
}
/// <summary>
/// Writes the Define Huffman Table marker and tables.
/// </summary>

7
src/ImageSharp/Formats/Jpeg/JpegMetadata.cs

@ -15,6 +15,7 @@ public class JpegMetadata : IDeepCloneable
/// </summary>
public JpegMetadata()
{
this.Comments = new List<JpegComData>();
}
/// <summary>
@ -25,6 +26,7 @@ public class JpegMetadata : IDeepCloneable
{
this.ColorType = other.ColorType;
this.Comments = other.Comments;
this.LuminanceQuality = other.LuminanceQuality;
this.ChrominanceQuality = other.ChrominanceQuality;
}
@ -101,6 +103,11 @@ public class JpegMetadata : IDeepCloneable
/// </remarks>
public bool? Progressive { get; internal set; }
/// <summary>
/// Gets the comments.
/// </summary>
public IList<JpegComData> Comments { get; }
/// <inheritdoc/>
public IDeepCloneable DeepClone() => new JpegMetadata(this);
}

1
src/ImageSharp/Formats/Jpeg/MetadataExtensions.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Text;
using SixLabors.ImageSharp.Formats.Jpeg;
using SixLabors.ImageSharp.Metadata;

47
src/ImageSharp/Formats/Png/PngDecoderCore.cs

@ -126,6 +126,11 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
/// </summary>
private readonly Crc32 crc32 = new();
/// <summary>
/// The maximum memory in bytes that a zTXt, sPLT, iTXt, iCCP, or unknown chunk can occupy when decompressed.
/// </summary>
private readonly int maxUncompressedLength;
/// <summary>
/// Initializes a new instance of the <see cref="PngDecoderCore"/> class.
/// </summary>
@ -138,6 +143,7 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
this.skipMetadata = options.GeneralOptions.SkipMetadata;
this.memoryAllocator = this.configuration.MemoryAllocator;
this.pngCrcChunkHandling = options.PngCrcChunkHandling;
this.maxUncompressedLength = options.MaxUncompressedAncillaryChunkSizeBytes;
}
internal PngDecoderCore(PngDecoderOptions options, bool colorMetadataOnly)
@ -149,6 +155,7 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
this.configuration = options.GeneralOptions.Configuration;
this.memoryAllocator = this.configuration.MemoryAllocator;
this.pngCrcChunkHandling = options.PngCrcChunkHandling;
this.maxUncompressedLength = options.MaxUncompressedAncillaryChunkSizeBytes;
}
/// <inheritdoc/>
@ -602,23 +609,7 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
private void InitializeImage<TPixel>(ImageMetadata metadata, FrameControl frameControl, out Image<TPixel> image)
where TPixel : unmanaged, IPixel<TPixel>
{
// When ignoring data CRCs, we can't use the image constructor that leaves the buffer uncleared.
if (this.pngCrcChunkHandling is PngCrcChunkHandling.IgnoreData or PngCrcChunkHandling.IgnoreAll)
{
image = new Image<TPixel>(
this.configuration,
this.header.Width,
this.header.Height,
metadata);
}
else
{
image = Image.CreateUninitialized<TPixel>(
this.configuration,
this.header.Width,
this.header.Height,
metadata);
}
image = new Image<TPixel>(this.configuration, this.header.Width, this.header.Height, metadata);
PngFrameMetadata frameMetadata = image.Frames.RootFrame.Metadata.GetPngMetadata();
frameMetadata.FromChunk(in frameControl);
@ -1575,7 +1566,7 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
ReadOnlySpan<byte> compressedData = data[(zeroIndex + 2)..];
if (this.TryDecompressZlibData(compressedData, out byte[] iccpProfileBytes))
if (this.TryDecompressZlibData(compressedData, this.maxUncompressedLength, out byte[] iccpProfileBytes))
{
metadata.IccProfile = new IccProfile(iccpProfileBytes);
}
@ -1585,9 +1576,10 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
/// Tries to decompress zlib compressed data.
/// </summary>
/// <param name="compressedData">The compressed data.</param>
/// <param name="maxLength">The maximum uncompressed length.</param>
/// <param name="uncompressedBytesArray">The uncompressed bytes array.</param>
/// <returns>True, if de-compressing was successful.</returns>
private unsafe bool TryDecompressZlibData(ReadOnlySpan<byte> compressedData, out byte[] uncompressedBytesArray)
private unsafe bool TryDecompressZlibData(ReadOnlySpan<byte> compressedData, int maxLength, out byte[] uncompressedBytesArray)
{
fixed (byte* compressedDataBase = compressedData)
{
@ -1607,6 +1599,12 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
int bytesRead = inflateStream.CompressedStream.Read(destUncompressedData, 0, destUncompressedData.Length);
while (bytesRead != 0)
{
if (memoryStreamOutput.Length > maxLength)
{
uncompressedBytesArray = Array.Empty<byte>();
return false;
}
memoryStreamOutput.Write(destUncompressedData[..bytesRead]);
bytesRead = inflateStream.CompressedStream.Read(destUncompressedData, 0, destUncompressedData.Length);
}
@ -1749,7 +1747,7 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
/// <returns>The <see cref="bool"/>.</returns>
private bool TryDecompressTextData(ReadOnlySpan<byte> compressedData, Encoding encoding, [NotNullWhen(true)] out string? value)
{
if (this.TryDecompressZlibData(compressedData, out byte[] uncompressedData))
if (this.TryDecompressZlibData(compressedData, this.maxUncompressedLength, out byte[] uncompressedData))
{
value = encoding.GetString(uncompressedData);
return true;
@ -1874,8 +1872,13 @@ internal sealed class PngDecoderCore : IImageDecoderInternals
PngChunkType type = this.ReadChunkType(buffer);
// If we're reading color metadata only we're only interested in the IHDR and tRNS chunks.
// We can skip all other chunk data in the stream for better performance.
if (this.colorMetadataOnly && type != PngChunkType.Header && type != PngChunkType.Transparency && type != PngChunkType.Palette)
// We can skip most other chunk data in the stream for better performance.
if (this.colorMetadataOnly &&
type != PngChunkType.Header &&
type != PngChunkType.Transparency &&
type != PngChunkType.Palette &&
type != PngChunkType.AnimationControl &&
type != PngChunkType.FrameControl)
{
chunk = new PngChunk(length, type);
return true;

6
src/ImageSharp/Formats/Png/PngDecoderOptions.cs

@ -15,4 +15,10 @@ public sealed class PngDecoderOptions : ISpecializedDecoderOptions
/// Gets a value indicating how to handle validation of any CRC (Cyclic Redundancy Check) data within the encoded PNG.
/// </summary>
public PngCrcChunkHandling PngCrcChunkHandling { get; init; } = PngCrcChunkHandling.IgnoreNonCritical;
/// <summary>
/// Gets the maximum memory in bytes that a zTXt, sPLT, iTXt, iCCP, or unknown chunk can occupy when decompressed.
/// Defaults to 8MB
/// </summary>
public int MaxUncompressedAncillaryChunkSizeBytes { get; init; } = 8 * 1024 * 1024; // 8MB
}

20
src/ImageSharp/Formats/Png/PngEncoderCore.cs

@ -4,6 +4,7 @@
using System.Buffers;
using System.Buffers.Binary;
using System.IO.Hashing;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Helpers;
@ -1559,7 +1560,24 @@ internal sealed class PngEncoderCore : IImageEncoderInternals, IDisposable
{
// We can use the color data from the decoded metadata here.
// We avoid dithering by default to preserve the original colors.
this.derivedTransparencyIndex = metadata.ColorTable.Value.Span.IndexOf(Color.Transparent);
ReadOnlySpan<Color> palette = metadata.ColorTable.Value.Span;
// Certain operations perform alpha premultiplication, which can cause the color to change so we
// must search for the transparency index in the palette.
// Transparent pixels are much more likely to be found at the end of a palette.
int index = -1;
for (int i = palette.Length - 1; i >= 0; i--)
{
Vector4 instance = palette[i].ToScaledVector4();
if (instance.W == 0f)
{
index = i;
break;
}
}
this.derivedTransparencyIndex = index;
this.quantizer = new PaletteQuantizer(metadata.ColorTable.Value, new() { Dither = null }, this.derivedTransparencyIndex);
}
else

26
src/ImageSharp/Formats/Webp/AlphaDecoder.cs

@ -6,7 +6,9 @@ using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Formats.Webp.BitReader;
using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Memory;
@ -311,32 +313,28 @@ internal class AlphaDecoder : IDisposable
private static void HorizontalUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
{
if (Sse2.IsSupported)
if ((Sse2.IsSupported || AdvSimd.IsSupported) && width >= 9)
{
dst[0] = (byte)(input[0] + (prev.IsEmpty ? 0 : prev[0]));
if (width <= 1)
{
return;
}
nuint i;
Vector128<int> last = Vector128<int>.Zero.WithElement(0, dst[0]);
ref byte srcRef = ref MemoryMarshal.GetReference(input);
ref byte dstRef = ref MemoryMarshal.GetReference(dst);
for (i = 1; i <= (uint)width - 8; i += 8)
{
Vector128<long> a0 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref srcRef, i)), 0);
Vector128<byte> a1 = Sse2.Add(a0.AsByte(), last.AsByte());
Vector128<byte> a2 = Sse2.ShiftLeftLogical128BitLane(a1, 1);
Vector128<byte> a3 = Sse2.Add(a1, a2);
Vector128<byte> a4 = Sse2.ShiftLeftLogical128BitLane(a3, 2);
Vector128<byte> a5 = Sse2.Add(a3, a4);
Vector128<byte> a6 = Sse2.ShiftLeftLogical128BitLane(a5, 4);
Vector128<byte> a7 = Sse2.Add(a5, a6);
Vector128<byte> a1 = a0.AsByte() + last.AsByte();
Vector128<byte> a2 = Vector128Utilities.ShiftLeftBytesInVector(a1, 1);
Vector128<byte> a3 = a1 + a2;
Vector128<byte> a4 = Vector128Utilities.ShiftLeftBytesInVector(a3, 2);
Vector128<byte> a5 = a3 + a4;
Vector128<byte> a6 = Vector128Utilities.ShiftLeftBytesInVector(a5, 4);
Vector128<byte> a7 = a5 + a6;
ref byte outputRef = ref Unsafe.Add(ref dstRef, i);
Unsafe.As<byte, Vector64<byte>>(ref outputRef) = a7.GetLower();
last = Sse2.ShiftRightLogical(a7.AsInt64(), 56).AsInt32();
last = Vector128.ShiftRightLogical(a7.AsInt64(), 56).AsInt32();
}
for (; i < (uint)width; ++i)

16
src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs

@ -20,15 +20,15 @@ public partial struct Rgba32
/// <inheritdoc />
public override void ToVector4(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
ReadOnlySpan<Rgba32> source,
Span<Vector4> destinationVectors,
PixelConversionModifiers modifiers)
{
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationVectors, nameof(destinationVectors));
Guard.DestinationShouldNotBeTooShort(source, destinationVectors, nameof(destinationVectors));
destinationVectors = destinationVectors[..sourcePixels.Length];
destinationVectors = destinationVectors[..source.Length];
SimdUtils.ByteToNormalizedFloat(
MemoryMarshal.Cast<Rgba32, byte>(sourcePixels),
MemoryMarshal.Cast<Rgba32, byte>(source),
MemoryMarshal.Cast<Vector4, float>(destinationVectors));
Vector4Converters.ApplyForwardConversionModifiers(destinationVectors, modifiers);
}
@ -37,16 +37,16 @@ public partial struct Rgba32
public override void FromVector4Destructive(
Configuration configuration,
Span<Vector4> sourceVectors,
Span<Rgba32> destinationPixels,
Span<Rgba32> destination,
PixelConversionModifiers modifiers)
{
Guard.DestinationShouldNotBeTooShort(sourceVectors, destinationPixels, nameof(destinationPixels));
Guard.DestinationShouldNotBeTooShort(sourceVectors, destination, nameof(destination));
destinationPixels = destinationPixels[..sourceVectors.Length];
destination = destination[..sourceVectors.Length];
Vector4Converters.ApplyBackwardConversionModifiers(sourceVectors, modifiers);
SimdUtils.NormalizedFloatToByteSaturate(
MemoryMarshal.Cast<Vector4, float>(sourceVectors),
MemoryMarshal.Cast<Rgba32, byte>(destinationPixels));
MemoryMarshal.Cast<Rgba32, byte>(destination));
}
/// <inheritdoc />

75
src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs

@ -5,6 +5,7 @@ using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
namespace SixLabors.ImageSharp.PixelFormats.Utils;
@ -31,74 +32,86 @@ internal static partial class Vector4Converters
/// Provides an efficient default implementation for <see cref="PixelOperations{TPixel}.ToVector4(Configuration,ReadOnlySpan{TPixel},Span{Vector4},PixelConversionModifiers)"/>
/// The method works by internally converting to a <see cref="Rgba32"/> therefore it's not applicable for that type!
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
/// <typeparam name="TPixel">The type of pixel format.</typeparam>
/// <param name="configuration">The configuration.</param>
/// <param name="pixelOperations">The pixel operations instance.</param>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
/// <param name="modifiers">The conversion modifier flags.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void ToVector4<TPixel>(
Configuration configuration,
PixelOperations<TPixel> pixelOperations,
ReadOnlySpan<TPixel> sourcePixels,
Span<Vector4> destVectors,
ReadOnlySpan<TPixel> source,
Span<Vector4> destination,
PixelConversionModifiers modifiers)
where TPixel : unmanaged, IPixel<TPixel>
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destVectors, nameof(destVectors));
Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination));
int count = sourcePixels.Length;
int count = source.Length;
// Not worth for small buffers:
if (count < Vector4ConversionThreshold)
{
Default.UnsafeToVector4(sourcePixels, destVectors, modifiers);
Default.UnsafeToVector4(source, destination, modifiers);
return;
}
// Using the last quarter of 'destVectors' as a temporary buffer to avoid allocation:
// Using the last quarter of 'destination' as a temporary buffer to avoid allocation:
int countWithoutLastItem = count - 1;
ReadOnlySpan<TPixel> reducedSource = sourcePixels[..countWithoutLastItem];
Span<Rgba32> lastQuarterOfDestBuffer = MemoryMarshal.Cast<Vector4, Rgba32>(destVectors).Slice((3 * count) + 1, countWithoutLastItem);
pixelOperations.ToRgba32(configuration, reducedSource, lastQuarterOfDestBuffer);
ReadOnlySpan<TPixel> reducedSource = source[..countWithoutLastItem];
Span<Rgba32> lastQuarterOfDestination = MemoryMarshal.Cast<Vector4, Rgba32>(destination).Slice((3 * count) + 1, countWithoutLastItem);
pixelOperations.ToRgba32(configuration, reducedSource, lastQuarterOfDestination);
// 'destVectors' and 'lastQuarterOfDestBuffer' are overlapping buffers,
// 'destination' and 'lastQuarterOfDestination' are overlapping buffers,
// but we are always reading/writing at different positions:
SimdUtils.ByteToNormalizedFloat(
MemoryMarshal.Cast<Rgba32, byte>(lastQuarterOfDestBuffer),
MemoryMarshal.Cast<Vector4, float>(destVectors[..countWithoutLastItem]));
MemoryMarshal.Cast<Rgba32, byte>(lastQuarterOfDestination),
MemoryMarshal.Cast<Vector4, float>(destination[..countWithoutLastItem]));
destVectors[countWithoutLastItem] = sourcePixels[countWithoutLastItem].ToVector4();
destination[countWithoutLastItem] = source[countWithoutLastItem].ToVector4();
// TODO: Investigate optimized 1-pass approach!
ApplyForwardConversionModifiers(destVectors, modifiers);
ApplyForwardConversionModifiers(destination, modifiers);
}
/// <summary>
/// Provides an efficient default implementation for <see cref="PixelOperations{TPixel}.FromVector4Destructive(Configuration,Span{Vector4},Span{TPixel},PixelConversionModifiers)"/>
/// The method is works by internally converting to a <see cref="Rgba32"/> therefore it's not applicable for that type!
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
/// <typeparam name="TPixel">The type of pixel format.</typeparam>
/// <param name="configuration">The configuration.</param>
/// <param name="pixelOperations">The pixel operations instance.</param>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
/// <param name="modifiers">The conversion modifier flags.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void FromVector4<TPixel>(
Configuration configuration,
PixelOperations<TPixel> pixelOperations,
Span<Vector4> sourceVectors,
Span<TPixel> destPixels,
Span<Vector4> source,
Span<TPixel> destination,
PixelConversionModifiers modifiers)
where TPixel : unmanaged, IPixel<TPixel>
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourceVectors, destPixels, nameof(destPixels));
Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination));
int count = sourceVectors.Length;
int count = source.Length;
// Not worth for small buffers:
if (count < Vector4ConversionThreshold)
{
Default.UnsafeFromVector4(sourceVectors, destPixels, modifiers);
Default.UnsafeFromVector4(source, destination, modifiers);
return;
}
// TODO: Investigate optimized 1-pass approach!
ApplyBackwardConversionModifiers(sourceVectors, modifiers);
ApplyBackwardConversionModifiers(source, modifiers);
// For the opposite direction it's not easy to implement the trick used in RunRgba32CompatibleToVector4Conversion,
// so let's allocate a temporary buffer as usually:
@ -106,20 +119,30 @@ internal static partial class Vector4Converters
Span<Rgba32> tempSpan = tempBuffer.Memory.Span;
SimdUtils.NormalizedFloatToByteSaturate(
MemoryMarshal.Cast<Vector4, float>(sourceVectors),
MemoryMarshal.Cast<Vector4, float>(source),
MemoryMarshal.Cast<Rgba32, byte>(tempSpan));
pixelOperations.FromRgba32(configuration, tempSpan, destPixels);
pixelOperations.FromRgba32(configuration, tempSpan, destination);
}
private static int CalculateVector4ConversionThreshold()
{
if (!Vector.IsHardwareAccelerated)
if (!Vector128.IsHardwareAccelerated)
{
return int.MaxValue;
}
return SimdUtils.ExtendedIntrinsics.IsAvailable && SimdUtils.HasVector8 ? 256 : 128;
if (Vector512.IsHardwareAccelerated)
{
return 512;
}
if (Vector256.IsHardwareAccelerated)
{
return 256;
}
return 128;
}
}
}

4
src/ImageSharp/Processing/Processors/Drawing/DrawImageProcessor{TPixelBg,TPixelFg}.cs

@ -98,9 +98,10 @@ internal class DrawImageProcessor<TPixelBg, TPixelFg> : ImageProcessor<TPixelBg>
top = 0;
}
// clamp the height/width to the availible space left to prevent overflowing
// Clamp the height/width to the available space left to prevent overflowing
foregroundRectangle.Width = Math.Min(source.Width - left, foregroundRectangle.Width);
foregroundRectangle.Height = Math.Min(source.Height - top, foregroundRectangle.Height);
foregroundRectangle = Rectangle.Intersect(foregroundRectangle, this.ForegroundImage.Bounds);
int width = foregroundRectangle.Width;
int height = foregroundRectangle.Height;
@ -111,7 +112,6 @@ internal class DrawImageProcessor<TPixelBg, TPixelFg> : ImageProcessor<TPixelBg>
}
// Sanitize the dimensions so that we don't try and sample outside the image.
foregroundRectangle = Rectangle.Intersect(foregroundRectangle, this.ForegroundImage.Bounds);
Rectangle backgroundRectangle = Rectangle.Intersect(new(left, top, width, height), this.SourceRectangle);
Configuration configuration = this.Configuration;

102
tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs

@ -18,9 +18,9 @@ namespace SixLabors.ImageSharp.Benchmarks.Bulk;
public abstract class FromVector4<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
protected IMemoryOwner<Vector4> source;
protected IMemoryOwner<Vector4> Source { get; set; }
protected IMemoryOwner<TPixel> destination;
protected IMemoryOwner<TPixel> Destination { get; set; }
protected Configuration Configuration => Configuration.Default;
@ -31,22 +31,22 @@ public abstract class FromVector4<TPixel>
[GlobalSetup]
public void Setup()
{
this.destination = this.Configuration.MemoryAllocator.Allocate<TPixel>(this.Count);
this.source = this.Configuration.MemoryAllocator.Allocate<Vector4>(this.Count);
this.Destination = this.Configuration.MemoryAllocator.Allocate<TPixel>(this.Count);
this.Source = this.Configuration.MemoryAllocator.Allocate<Vector4>(this.Count);
}
[GlobalCleanup]
public void Cleanup()
{
this.destination.Dispose();
this.source.Dispose();
this.Destination.Dispose();
this.Source.Dispose();
}
// [Benchmark]
public void PerElement()
{
ref Vector4 s = ref MemoryMarshal.GetReference(this.source.GetSpan());
ref TPixel d = ref MemoryMarshal.GetReference(this.destination.GetSpan());
ref Vector4 s = ref MemoryMarshal.GetReference(this.Source.GetSpan());
ref TPixel d = ref MemoryMarshal.GetReference(this.Destination.GetSpan());
for (nuint i = 0; i < (uint)this.Count; i++)
{
Unsafe.Add(ref d, i) = TPixel.FromVector4(Unsafe.Add(ref s, i));
@ -55,38 +55,20 @@ public abstract class FromVector4<TPixel>
[Benchmark(Baseline = true)]
public void PixelOperations_Base()
=> new PixelOperations<TPixel>().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan());
=> new PixelOperations<TPixel>().FromVector4Destructive(this.Configuration, this.Source.GetSpan(), this.Destination.GetSpan());
[Benchmark]
public void PixelOperations_Specialized()
=> PixelOperations<TPixel>.Instance.FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan());
=> PixelOperations<TPixel>.Instance.FromVector4Destructive(this.Configuration, this.Source.GetSpan(), this.Destination.GetSpan());
}
public class FromVector4Rgba32 : FromVector4<Rgba32>
{
[Benchmark]
public void FallbackIntrinsics128()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.FallbackIntrinsics128.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
[Benchmark]
public void ExtendedIntrinsic()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
[Benchmark]
public void UseHwIntrinsics()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.Source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.Destination.GetSpan());
SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
@ -96,8 +78,8 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
[Benchmark]
public void UseAvx2_Grouped()
{
Span<float> src = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
Span<float> src = MemoryMarshal.Cast<Vector4, float>(this.Source.GetSpan());
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(this.Destination.GetSpan());
nuint n = (uint)dest.Length / (uint)Vector<byte>.Count;
@ -107,7 +89,7 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
var maxBytes = Vector256.Create(255f);
Vector256<float> maxBytes = Vector256.Create(255f);
for (nuint i = 0; i < n; i++)
{
@ -137,25 +119,37 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale)
{
vf = Avx.Multiply(scale, vf);
return Avx.ConvertToVector256Int32(vf);
}
// *** RESULTS 2020 March: ***
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.200-preview-014971
// Job-IUZXZT : .NET Core 3.1.2 (CoreCLR 4.700.20.6602, CoreFX 4.700.20.6702), X64 RyuJIT
//
// | Method | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |------ |-----------:|------------:|----------:|------:|--------:|------:|------:|------:|----------:|
// | FallbackIntrinsics128 | 1024 | 2,952.6 ns | 1,680.77 ns | 92.13 ns | 3.32 | 0.16 | - | - | - | - |
// | BasicIntrinsics256 | 1024 | 1,664.5 ns | 928.11 ns | 50.87 ns | 1.87 | 0.09 | - | - | - | - |
// | ExtendedIntrinsic | 1024 | 890.6 ns | 375.48 ns | 20.58 ns | 1.00 | 0.00 | - | - | - | - |
// | UseAvx2 | 1024 | 299.0 ns | 30.47 ns | 1.67 ns | 0.34 | 0.01 | - | - | - | - |
// | UseAvx2_Grouped | 1024 | 318.1 ns | 48.19 ns | 2.64 ns | 0.36 | 0.01 | - | - | - | - |
// | PixelOperations_Base | 1024 | 8,136.9 ns | 1,834.82 ns | 100.57 ns | 9.14 | 0.26 | - | - | - | 24 B |
// | PixelOperations_Specialized | 1024 | 951.1 ns | 123.93 ns | 6.79 ns | 1.07 | 0.03 | - | - | - | - |
/*
BenchmarkDotNet v0.13.10, Windows 11 (10.0.22631.3085/23H2/2023Update/SunValley3)
11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
.NET SDK 8.0.200-preview.23624.5
[Host] : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Job-YJYLLR : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Runtime=.NET 8.0 Arguments=/p:DebugType=portable IterationCount=3
LaunchCount=1 WarmupCount=3
| Method | Count | Mean | Error | StdDev | Ratio | RatioSD | Allocated | Alloc Ratio |
|---------------------------- |------ |------------:|-------------:|-----------:|------:|--------:|----------:|------------:|
| PixelOperations_Base | 64 | 114.80 ns | 16.459 ns | 0.902 ns | 1.00 | 0.00 | - | NA |
| PixelOperations_Specialized | 64 | 28.91 ns | 80.482 ns | 4.411 ns | 0.25 | 0.04 | - | NA |
| FallbackIntrinsics128 | 64 | 133.60 ns | 23.750 ns | 1.302 ns | 1.16 | 0.02 | - | NA |
| ExtendedIntrinsic | 64 | 40.11 ns | 10.183 ns | 0.558 ns | 0.35 | 0.01 | - | NA |
| UseHwIntrinsics | 64 | 14.71 ns | 4.860 ns | 0.266 ns | 0.13 | 0.00 | - | NA |
| UseAvx2_Grouped | 64 | 20.23 ns | 11.619 ns | 0.637 ns | 0.18 | 0.00 | - | NA |
| | | | | | | | | |
| PixelOperations_Base | 256 | 387.94 ns | 31.591 ns | 1.732 ns | 1.00 | 0.00 | - | NA |
| PixelOperations_Specialized | 256 | 50.93 ns | 22.388 ns | 1.227 ns | 0.13 | 0.00 | - | NA |
| FallbackIntrinsics128 | 256 | 509.72 ns | 249.926 ns | 13.699 ns | 1.31 | 0.04 | - | NA |
| ExtendedIntrinsic | 256 | 140.32 ns | 9.353 ns | 0.513 ns | 0.36 | 0.00 | - | NA |
| UseHwIntrinsics | 256 | 41.99 ns | 16.000 ns | 0.877 ns | 0.11 | 0.00 | - | NA |
| UseAvx2_Grouped | 256 | 63.81 ns | 2.360 ns | 0.129 ns | 0.16 | 0.00 | - | NA |
| | | | | | | | | |
| PixelOperations_Base | 2048 | 2,979.49 ns | 2,023.706 ns | 110.926 ns | 1.00 | 0.00 | - | NA |
| PixelOperations_Specialized | 2048 | 326.19 ns | 19.077 ns | 1.046 ns | 0.11 | 0.00 | - | NA |
| FallbackIntrinsics128 | 2048 | 3,885.95 ns | 411.078 ns | 22.533 ns | 1.31 | 0.05 | - | NA |
| ExtendedIntrinsic | 2048 | 1,078.58 ns | 136.960 ns | 7.507 ns | 0.36 | 0.01 | - | NA |
| UseHwIntrinsics | 2048 | 312.07 ns | 68.662 ns | 3.764 ns | 0.10 | 0.00 | - | NA |
| UseAvx2_Grouped | 2048 | 451.83 ns | 41.742 ns | 2.288 ns | 0.15 | 0.01 | - | NA |
*/
}

66
tests/ImageSharp.Benchmarks/Bulk/FromVector4_Rgb24.cs

@ -7,48 +7,26 @@ using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.Bulk;
[Config(typeof(Config.Short))]
public class FromVector4_Rgb24 : FromVector4<Rgb24>
{
}
public class FromVector4_Rgb24 : FromVector4<Rgb24>;
// 2020-11-02
// ##########
//
// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT
// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT
// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// IterationCount=3 LaunchCount=1 WarmupCount=3
//
// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:|
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B |
/*
BenchmarkDotNet v0.13.10, Windows 11 (10.0.22631.3085/23H2/2023Update/SunValley3)
11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
.NET SDK 8.0.200-preview.23624.5
[Host] : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Job-NEHCEM : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Runtime=.NET 8.0 Arguments=/p:DebugType=portable IterationCount=3
LaunchCount=1 WarmupCount=3
| Method | Count | Mean | Error | StdDev | Ratio | Gen0 | Allocated | Alloc Ratio |
|---------------------------- |------ |------------:|----------:|---------:|------:|-------:|----------:|------------:|
| PixelOperations_Base | 64 | 95.87 ns | 13.60 ns | 0.745 ns | 1.00 | - | - | NA |
| PixelOperations_Specialized | 64 | 97.34 ns | 30.34 ns | 1.663 ns | 1.02 | - | - | NA |
| | | | | | | | | |
| PixelOperations_Base | 256 | 337.80 ns | 88.10 ns | 4.829 ns | 1.00 | - | - | NA |
| PixelOperations_Specialized | 256 | 195.07 ns | 30.54 ns | 1.674 ns | 0.58 | 0.0153 | 96 B | NA |
| | | | | | | | | |
| PixelOperations_Base | 2048 | 2,561.79 ns | 162.45 ns | 8.905 ns | 1.00 | - | - | NA |
| PixelOperations_Specialized | 2048 | 741.85 ns | 18.05 ns | 0.989 ns | 0.29 | 0.0153 | 96 B | NA |
*/

25
tests/ImageSharp.Benchmarks/Bulk/ToVector4.cs

@ -14,9 +14,9 @@ namespace SixLabors.ImageSharp.Benchmarks.Bulk;
public abstract class ToVector4<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
protected IMemoryOwner<TPixel> source;
protected IMemoryOwner<TPixel> Source { get; set; }
protected IMemoryOwner<Vector4> destination;
protected IMemoryOwner<Vector4> Destination { get; set; }
protected Configuration Configuration => Configuration.Default;
@ -26,22 +26,22 @@ public abstract class ToVector4<TPixel>
[GlobalSetup]
public void Setup()
{
this.source = this.Configuration.MemoryAllocator.Allocate<TPixel>(this.Count);
this.destination = this.Configuration.MemoryAllocator.Allocate<Vector4>(this.Count);
this.Source = this.Configuration.MemoryAllocator.Allocate<TPixel>(this.Count);
this.Destination = this.Configuration.MemoryAllocator.Allocate<Vector4>(this.Count);
}
[GlobalCleanup]
public void Cleanup()
{
this.source.Dispose();
this.destination.Dispose();
this.Source.Dispose();
this.Destination.Dispose();
}
// [Benchmark]
public void Naive()
{
Span<TPixel> s = this.source.GetSpan();
Span<Vector4> d = this.destination.GetSpan();
Span<TPixel> s = this.Source.GetSpan();
Span<Vector4> d = this.Destination.GetSpan();
for (int i = 0; i < this.Count; i++)
{
@ -50,11 +50,8 @@ public abstract class ToVector4<TPixel>
}
[Benchmark]
public void PixelOperations_Specialized()
{
PixelOperations<TPixel>.Instance.ToVector4(
public void PixelOperations_Specialized() => PixelOperations<TPixel>.Instance.ToVector4(
this.Configuration,
this.source.GetSpan(),
this.destination.GetSpan());
}
this.Source.GetSpan(),
this.Destination.GetSpan());
}

4
tests/ImageSharp.Benchmarks/Bulk/ToVector4_Bgra32.cs

@ -16,8 +16,8 @@ public class ToVector4_Bgra32 : ToVector4<Bgra32>
{
new PixelOperations<Bgra32>().ToVector4(
this.Configuration,
this.source.GetSpan(),
this.destination.GetSpan());
this.Source.GetSpan(),
this.Destination.GetSpan());
}
// RESULTS:

4
tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgb24.cs

@ -16,8 +16,8 @@ public class ToVector4_Rgb24 : ToVector4<Rgb24>
{
new PixelOperations<Rgb24>().ToVector4(
this.Configuration,
this.source.GetSpan(),
this.destination.GetSpan());
this.Source.GetSpan(),
this.Destination.GetSpan());
}
}

72
tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgba32.cs

@ -14,36 +14,18 @@ namespace SixLabors.ImageSharp.Benchmarks.Bulk;
[Config(typeof(Config.Short))]
public class ToVector4_Rgba32 : ToVector4<Rgba32>
{
[Benchmark]
public void FallbackIntrinsics128()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.FallbackIntrinsics128.ByteToNormalizedFloat(sBytes, dFloats);
}
[Benchmark]
public void PixelOperations_Base()
=> new PixelOperations<Rgba32>().ToVector4(
this.Configuration,
this.source.GetSpan(),
this.destination.GetSpan());
[Benchmark]
public void ExtendedIntrinsics()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats);
}
this.Source.GetSpan(),
this.Destination.GetSpan());
[Benchmark]
public void HwIntrinsics()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.Source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.Destination.GetSpan());
SimdUtils.HwIntrinsics.ByteToNormalizedFloat(sBytes, dFloats);
}
@ -51,8 +33,8 @@ public class ToVector4_Rgba32 : ToVector4<Rgba32>
// [Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.Source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.Destination.GetSpan());
nuint n = (uint)dFloats.Length / (uint)Vector<byte>.Count;
@ -76,14 +58,14 @@ public class ToVector4_Rgba32 : ToVector4<Rgba32>
}
n = (uint)(dFloats.Length / Vector<float>.Count);
var scale = new Vector<float>(1f / 255f);
Vector<float> scale = new(1f / 255f);
for (nuint i = 0; i < n; i++)
{
ref Vector<float> dRef = ref Unsafe.Add(ref destBase, i);
var du = Vector.AsVectorInt32(dRef);
var v = Vector.ConvertToSingle(du);
Vector<int> du = Vector.AsVectorInt32(dRef);
Vector<float> v = Vector.ConvertToSingle(du);
v *= scale;
dRef = v;
@ -93,14 +75,14 @@ public class ToVector4_Rgba32 : ToVector4<Rgba32>
// [Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_ConvertInSameLoop()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.Source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.Destination.GetSpan());
nuint n = (uint)dFloats.Length / (uint)Vector<byte>.Count;
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference((ReadOnlySpan<byte>)sBytes));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dFloats));
var scale = new Vector<float>(1f / 255f);
Vector<float> scale = new(1f / 255f);
for (nuint i = 0; i < n; i++)
{
@ -126,8 +108,8 @@ public class ToVector4_Rgba32 : ToVector4<Rgba32>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToNormalizedSingle(Vector<uint> u, Vector<float> scale)
{
var vi = Vector.AsVectorInt32(u);
var v = Vector.ConvertToSingle(vi);
Vector<int> vi = Vector.AsVectorInt32(u);
Vector<float> v = Vector.ConvertToSingle(vi);
v *= scale;
return v;
}
@ -160,4 +142,30 @@ public class ToVector4_Rgba32 : ToVector4<Rgba32>
PixelOperations_Base | Core | 2048 | 6,752.68 ns | 272.820 ns | 15.4148 ns | 1.67 | 0.02 | - | 24 B |
PixelOperations_Specialized | Core | 2048 | 1,126.13 ns | 79.192 ns | 4.4745 ns |!! 0.28 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock!
*/
/*
BenchmarkDotNet v0.13.10, Windows 11 (10.0.22631.3085/23H2/2023Update/SunValley3)
11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
.NET SDK 8.0.200-preview.23624.5
[Host] : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Job-DFEQJT : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Runtime=.NET 8.0 Arguments=/p:DebugType=portable IterationCount=3
LaunchCount=1 WarmupCount=3
| Method | Count | Mean | Error | StdDev | Allocated |
|---------------------------- |------ |------------:|-----------:|----------:|----------:|
| FallbackIntrinsics128 | 64 | 139.66 ns | 27.429 ns | 1.503 ns | - |
| PixelOperations_Base | 64 | 124.65 ns | 29.653 ns | 1.625 ns | - |
| HwIntrinsics | 64 | 18.16 ns | 4.731 ns | 0.259 ns | - |
| PixelOperations_Specialized | 64 | 27.94 ns | 15.220 ns | 0.834 ns | - |
| FallbackIntrinsics128 | 256 | 525.07 ns | 34.397 ns | 1.885 ns | - |
| PixelOperations_Base | 256 | 464.17 ns | 46.897 ns | 2.571 ns | - |
| HwIntrinsics | 256 | 43.88 ns | 4.525 ns | 0.248 ns | - |
| PixelOperations_Specialized | 256 | 55.57 ns | 14.587 ns | 0.800 ns | - |
| FallbackIntrinsics128 | 2048 | 4,148.44 ns | 476.583 ns | 26.123 ns | - |
| PixelOperations_Base | 2048 | 3,608.42 ns | 66.293 ns | 3.634 ns | - |
| HwIntrinsics | 2048 | 361.42 ns | 35.576 ns | 1.950 ns | - |
| PixelOperations_Specialized | 2048 | 374.82 ns | 33.371 ns | 1.829 ns | - |
*/
}

2
tests/ImageSharp.Benchmarks/LoadResizeSave/README.md

@ -1,4 +1,4 @@
The benchmarks have been adapted from the
The benchmarks have been adapted from the
[PhotoSauce's MemoryStress project](https://github.com/saucecontrol/core-imaging-playground/tree/beeees/MemoryStress).
### Setup

2
tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs

@ -105,7 +105,7 @@ public partial class ColorTests
public void Vector4Constructor()
{
// Act:
Color color = (Color)Vector4.One;
Color color = Color.FromScaledVector(Vector4.One);
// Assert:
Assert.Equal(new RgbaVector(1, 1, 1, 1), color.ToPixel<RgbaVector>());

59
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -3,6 +3,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests.TestUtilities;
@ -112,26 +113,15 @@ public partial class SimdUtilsTests
public static readonly TheoryData<int> ArraySizesDivisibleBy4 = new() { 0, 4, 8, 28, 1020 };
public static readonly TheoryData<int> ArraySizesDivisibleBy3 = new() { 0, 3, 9, 36, 957 };
public static readonly TheoryData<int> ArraySizesDivisibleBy32 = new() { 0, 32, 512 };
public static readonly TheoryData<int> ArraySizesDivisibleBy64 = new() { 0, 64, 512 };
public static readonly TheoryData<int> ArbitraryArraySizes = new() { 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520 };
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void FallbackIntrinsics128_BulkConvertByteToNormalizedFloat(int count) => TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.FallbackIntrinsics128.ByteToNormalizedFloat(s.Span, d.Span));
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat(int count) => TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(s.Span, d.Span));
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
[MemberData(nameof(ArraySizesDivisibleBy64))]
public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count)
{
if (!Sse2.IsSupported)
if (!Sse2.IsSupported && !AdvSimd.IsSupported)
{
return;
}
@ -143,7 +133,7 @@ public partial class SimdUtilsTests
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41);
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX512F | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41);
}
[Theory]
@ -166,43 +156,10 @@ public partial class SimdUtilsTests
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void FallbackIntrinsics128_BulkConvertNormalizedFloatToByteClampOverflows(int count) => TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count,
(s, d) => SimdUtils.FallbackIntrinsics128.NormalizedFloatToByteSaturate(s.Span, d.Span));
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
public void ExtendedIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) => TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count,
(s, d) => SimdUtils.ExtendedIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
[Theory]
[InlineData(1234)]
public void ExtendedIntrinsics_ConvertToSingle(short scale)
{
int n = Vector<float>.Count;
short[] sData = new Random(scale).GenerateRandomInt16Array(2 * n, (short)-scale, scale);
float[] fData = sData.Select(u => (float)u).ToArray();
Vector<short> source = new(sData);
Vector<float> expected1 = new(fData, 0);
Vector<float> expected2 = new(fData, n);
// Act:
SimdUtils.ExtendedIntrinsics.ConvertToSingle(source, out Vector<float> actual1, out Vector<float> actual2);
// Assert:
Assert.Equal(expected1, actual1);
Assert.Equal(expected2, actual2);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
[MemberData(nameof(ArraySizesDivisibleBy64))]
public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
if (!Sse2.IsSupported)
if (!Sse2.IsSupported && !AdvSimd.IsSupported)
{
return;
}
@ -214,7 +171,7 @@ public partial class SimdUtilsTests
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX512BW | HwIntrinsics.DisableAVX2);
}
[Theory]

15
tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs

@ -425,6 +425,21 @@ public partial class JpegDecoderTests
VerifyEncodedStrings(exif);
}
[Theory]
[WithFile(TestImages.Jpeg.Issues.Issue2067_CommentMarker, PixelTypes.Rgba32)]
public void JpegDecoder_DecodeMetadataComment<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{
string expectedComment = "TEST COMMENT";
using Image<TPixel> image = provider.GetImage(JpegDecoder.Instance);
JpegMetadata metadata = image.Metadata.GetJpegMetadata();
Assert.Equal(1, metadata.Comments.Count);
Assert.Equal(expectedComment, metadata.Comments.ElementAtOrDefault(0).ToString());
image.DebugSave(provider);
image.CompareToOriginal(provider);
}
private static void VerifyEncodedStrings(ExifProfile exif)
{
Assert.NotNull(exif);

153
tests/ImageSharp.Tests/Formats/Jpg/JpegEncoderTests.Metadata.cs

@ -32,19 +32,19 @@ public partial class JpegEncoderTests
public void Encode_PreservesIptcProfile()
{
// arrange
using var input = new Image<Rgba32>(1, 1);
var expectedProfile = new IptcProfile();
using Image<Rgba32> input = new(1, 1);
IptcProfile expectedProfile = new();
expectedProfile.SetValue(IptcTag.Country, "ESPAÑA");
expectedProfile.SetValue(IptcTag.City, "unit-test-city");
input.Metadata.IptcProfile = expectedProfile;
// act
using var memStream = new MemoryStream();
using MemoryStream memStream = new();
input.Save(memStream, JpegEncoder);
// assert
memStream.Position = 0;
using var output = Image.Load<Rgba32>(memStream);
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
IptcProfile actual = output.Metadata.IptcProfile;
Assert.NotNull(actual);
IEnumerable<IptcValue> values = expectedProfile.Values;
@ -55,17 +55,17 @@ public partial class JpegEncoderTests
public void Encode_PreservesExifProfile()
{
// arrange
using var input = new Image<Rgba32>(1, 1);
using Image<Rgba32> input = new(1, 1);
input.Metadata.ExifProfile = new ExifProfile();
input.Metadata.ExifProfile.SetValue(ExifTag.Software, "unit_test");
// act
using var memStream = new MemoryStream();
using MemoryStream memStream = new();
input.Save(memStream, JpegEncoder);
// assert
memStream.Position = 0;
using var output = Image.Load<Rgba32>(memStream);
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
ExifProfile actual = output.Metadata.ExifProfile;
Assert.NotNull(actual);
IReadOnlyList<IExifValue> values = input.Metadata.ExifProfile.Values;
@ -76,16 +76,16 @@ public partial class JpegEncoderTests
public void Encode_PreservesIccProfile()
{
// arrange
using var input = new Image<Rgba32>(1, 1);
using Image<Rgba32> input = new(1, 1);
input.Metadata.IccProfile = new IccProfile(IccTestDataProfiles.Profile_Random_Array);
// act
using var memStream = new MemoryStream();
using MemoryStream memStream = new();
input.Save(memStream, JpegEncoder);
// assert
memStream.Position = 0;
using var output = Image.Load<Rgba32>(memStream);
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
IccProfile actual = output.Metadata.IccProfile;
Assert.NotNull(actual);
IccProfile values = input.Metadata.IccProfile;
@ -99,12 +99,10 @@ public partial class JpegEncoderTests
{
Exception ex = Record.Exception(() =>
{
var encoder = new JpegEncoder();
using (var stream = new MemoryStream())
{
using Image<TPixel> image = provider.GetImage(JpegDecoder.Instance);
image.Save(stream, encoder);
}
JpegEncoder encoder = new();
using MemoryStream stream = new();
using Image<TPixel> image = provider.GetImage(JpegDecoder.Instance);
image.Save(stream, encoder);
});
Assert.Null(ex);
@ -114,44 +112,99 @@ public partial class JpegEncoderTests
[MemberData(nameof(RatioFiles))]
public void Encode_PreserveRatio(string imagePath, int xResolution, int yResolution, PixelResolutionUnit resolutionUnit)
{
var testFile = TestFile.Create(imagePath);
using (Image<Rgba32> input = testFile.CreateRgba32Image())
{
using (var memStream = new MemoryStream())
{
input.Save(memStream, JpegEncoder);
memStream.Position = 0;
using (var output = Image.Load<Rgba32>(memStream))
{
ImageMetadata meta = output.Metadata;
Assert.Equal(xResolution, meta.HorizontalResolution);
Assert.Equal(yResolution, meta.VerticalResolution);
Assert.Equal(resolutionUnit, meta.ResolutionUnits);
}
}
}
TestFile testFile = TestFile.Create(imagePath);
using Image<Rgba32> input = testFile.CreateRgba32Image();
using MemoryStream memStream = new();
input.Save(memStream, JpegEncoder);
memStream.Position = 0;
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
ImageMetadata meta = output.Metadata;
Assert.Equal(xResolution, meta.HorizontalResolution);
Assert.Equal(yResolution, meta.VerticalResolution);
Assert.Equal(resolutionUnit, meta.ResolutionUnits);
}
[Theory]
[MemberData(nameof(QualityFiles))]
public void Encode_PreservesQuality(string imagePath, int quality)
{
var testFile = TestFile.Create(imagePath);
using (Image<Rgba32> input = testFile.CreateRgba32Image())
{
using (var memStream = new MemoryStream())
{
input.Save(memStream, JpegEncoder);
memStream.Position = 0;
using (var output = Image.Load<Rgba32>(memStream))
{
JpegMetadata meta = output.Metadata.GetJpegMetadata();
Assert.Equal(quality, meta.Quality);
}
}
}
TestFile testFile = TestFile.Create(imagePath);
using Image<Rgba32> input = testFile.CreateRgba32Image();
using MemoryStream memStream = new();
input.Save(memStream, JpegEncoder);
memStream.Position = 0;
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
JpegMetadata meta = output.Metadata.GetJpegMetadata();
Assert.Equal(quality, meta.Quality);
}
[Theory]
[WithFile(TestImages.Jpeg.Issues.Issue2067_CommentMarker, PixelTypes.Rgba32)]
public void Encode_PreservesComments<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{
// arrange
using Image<TPixel> input = provider.GetImage(JpegDecoder.Instance);
using MemoryStream memStream = new();
// act
input.Save(memStream, JpegEncoder);
// assert
memStream.Position = 0;
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
JpegMetadata actual = output.Metadata.GetJpegMetadata();
Assert.NotEmpty(actual.Comments);
Assert.Equal(1, actual.Comments.Count);
Assert.Equal("TEST COMMENT", actual.Comments[0].ToString());
}
[Fact]
public void Encode_SavesMultipleComments()
{
// arrange
using Image<Rgba32> input = new(1, 1);
JpegMetadata meta = input.Metadata.GetJpegMetadata();
using MemoryStream memStream = new();
// act
meta.Comments.Add(JpegComData.FromString("First comment"));
meta.Comments.Add(JpegComData.FromString("Second Comment"));
input.Save(memStream, JpegEncoder);
// assert
memStream.Position = 0;
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
JpegMetadata actual = output.Metadata.GetJpegMetadata();
Assert.NotEmpty(actual.Comments);
Assert.Equal(2, actual.Comments.Count);
Assert.Equal(meta.Comments[0].ToString(), actual.Comments[0].ToString());
Assert.Equal(meta.Comments[1].ToString(), actual.Comments[1].ToString());
}
[Fact]
public void Encode_SaveTooLongComment()
{
// arrange
string longString = new('c', 65534);
using Image<Rgba32> input = new(1, 1);
JpegMetadata meta = input.Metadata.GetJpegMetadata();
using MemoryStream memStream = new();
// act
meta.Comments.Add(JpegComData.FromString(longString));
input.Save(memStream, JpegEncoder);
// assert
memStream.Position = 0;
using Image<Rgba32> output = Image.Load<Rgba32>(memStream);
JpegMetadata actual = output.Metadata.GetJpegMetadata();
Assert.NotEmpty(actual.Comments);
Assert.Equal(2, actual.Comments.Count);
Assert.Equal(longString[..65533], actual.Comments[0].ToString());
Assert.Equal("c", actual.Comments[1].ToString());
}
[Theory]
@ -164,14 +217,14 @@ public partial class JpegEncoderTests
{
// arrange
using Image<TPixel> input = provider.GetImage(JpegDecoder.Instance);
using var memoryStream = new MemoryStream();
using MemoryStream memoryStream = new();
// act
input.Save(memoryStream, JpegEncoder);
// assert
memoryStream.Position = 0;
using var output = Image.Load<Rgba32>(memoryStream);
using Image<Rgba32> output = Image.Load<Rgba32>(memoryStream);
JpegMetadata meta = output.Metadata.GetJpegMetadata();
Assert.Equal(expectedColorType, meta.ColorType);
}

22
tests/ImageSharp.Tests/Formats/Jpg/JpegMetadataTests.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Collections.ObjectModel;
using SixLabors.ImageSharp.Formats.Jpeg;
namespace SixLabors.ImageSharp.Tests.Formats.Jpg;
@ -57,4 +58,25 @@ public class JpegMetadataTests
Assert.Equal(meta.Quality, qualityLuma);
}
[Fact]
public void Comment_EmptyComment()
{
var meta = new JpegMetadata();
Assert.True(Array.Empty<JpegComData>().SequenceEqual(meta.Comments));
}
[Fact]
public void Comment_OnlyComment()
{
string comment = "test comment";
var expectedCollection = new Collection<string> { comment };
var meta = new JpegMetadata();
meta.Comments.Add(JpegComData.FromString(comment));
Assert.Equal(1, meta.Comments.Count);
Assert.True(expectedCollection.FirstOrDefault() == meta.Comments.FirstOrDefault().ToString());
}
}

26
tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs

@ -665,4 +665,30 @@ public partial class PngDecoderTests
Assert.True(eofHitCounter.EofHitCount <= 3);
Assert.Equal(new Size(200, 120), eofHitCounter.Image.Size);
}
[Fact]
public void Decode_Issue2666()
{
string path = Path.GetFullPath(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImages.Png.Issue2666));
using Image image = Image.Load(path);
}
[Theory]
[InlineData(TestImages.Png.Bad.BadZTXT)]
[InlineData(TestImages.Png.Bad.BadZTXT2)]
public void Decode_BadZTXT(string file)
{
string path = Path.GetFullPath(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, file));
using Image image = Image.Load(path);
}
[Theory]
[InlineData(TestImages.Png.Bad.BadZTXT)]
[InlineData(TestImages.Png.Bad.BadZTXT2)]
public void Info_BadZTXT(string file)
{
string path = Path.GetFullPath(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, file));
_ = Image.Identify(path);
}
}

17
tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs

@ -8,6 +8,7 @@ using SixLabors.ImageSharp.Formats.Png;
using SixLabors.ImageSharp.Formats.Webp;
using SixLabors.ImageSharp.Metadata;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.Processing.Processors.Quantization;
using SixLabors.ImageSharp.Tests.TestUtilities;
using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison;
@ -679,6 +680,22 @@ public partial class PngEncoderTests
encoded.CompareToReferenceOutput(ImageComparer.Exact, provider);
}
// https://github.com/SixLabors/ImageSharp/issues/2469
[Theory]
[WithFile(TestImages.Png.Issue2668, PixelTypes.Rgba32)]
public void Issue2668_Quantized_Encode_Alpha<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{
using Image<TPixel> image = provider.GetImage(PngDecoder.Instance);
image.Mutate(x => x.Resize(100, 100));
PngEncoder encoder = new() { BitDepth = PngBitDepth.Bit8, ColorType = PngColorType.Palette };
string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "png", encoder);
using Image<Rgba32> encoded = Image.Load<Rgba32>(actualOutputFile);
encoded.CompareToReferenceOutput(ImageComparer.Exact, provider);
}
private static void TestPngEncoderCore<TPixel>(
TestImageProvider<TPixel> provider,
PngColorType pngColorType,

11
tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs

@ -439,6 +439,17 @@ public class WebpDecoderTests
image.CompareToOriginal(provider, ReferenceDecoder);
}
// https://github.com/SixLabors/ImageSharp/issues/2670
[Theory]
[WithFile(Lossy.Issue2670, PixelTypes.Rgba32)]
public void WebpDecoder_CanDecode_Issue2670<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{
using Image<TPixel> image = provider.GetImage(WebpDecoder.Instance);
image.DebugSave(provider);
image.CompareToOriginal(provider, ReferenceDecoder);
}
[Theory]
[WithFile(Lossless.LossLessCorruptImage3, PixelTypes.Rgba32)]
public void WebpDecoder_ThrowImageFormatException_OnInvalidImages<TPixel>(TestImageProvider<TPixel> provider)

10
tests/ImageSharp.Tests/TestImages.cs

@ -73,6 +73,7 @@ public static class TestImages
public const string DisposeBackgroundRegion = "Png/animated/15-dispose-background-region.png";
public const string DisposePreviousFirst = "Png/animated/12-dispose-prev-first.png";
public const string BlendOverMultiple = "Png/animated/21-blend-over-multiple.png";
public const string Issue2666 = "Png/issues/Issue_2666.png";
// Filtered test images from http://www.schaik.com/pngsuite/pngsuite_fil_png.html
public const string Filter0 = "Png/filter0.png";
@ -150,6 +151,9 @@ public static class TestImages
// Issue 2447: https://github.com/SixLabors/ImageSharp/issues/2447
public const string Issue2447 = "Png/issues/issue_2447.png";
// Issue 2668: https://github.com/SixLabors/ImageSharp/issues/2668
public const string Issue2668 = "Png/issues/Issue_2668.png";
public static class Bad
{
public const string MissingDataChunk = "Png/xdtn0g01.png";
@ -182,8 +186,10 @@ public static class TestImages
// Invalid color type.
public const string ColorTypeOne = "Png/xc1n0g08.png";
public const string ColorTypeNine = "Png/xc9n2c08.png";
public const string FlagOfGermany0000016446 = "Png/issues/flag_of_germany-0000016446.png";
public const string BadZTXT = "Png/issues/bad-ztxt.png";
public const string BadZTXT2 = "Png/issues/bad-ztxt2.png";
}
}
@ -309,6 +315,7 @@ public static class TestImages
public const string Issue2564 = "Jpg/issues/issue-2564.jpg";
public const string HangBadScan = "Jpg/issues/Hang_C438A851.jpg";
public const string Issue2517 = "Jpg/issues/issue2517-bad-d7.jpg";
public const string Issue2067_CommentMarker = "Jpg/issues/issue-2067-comment.jpg";
public static class Fuzz
{
@ -805,6 +812,7 @@ public static class TestImages
public const string Issue1594 = "Webp/issues/Issue1594.webp";
public const string Issue2243 = "Webp/issues/Issue2243.webp";
public const string Issue2257 = "Webp/issues/Issue2257.webp";
public const string Issue2670 = "Webp/issues/Issue2670.webp";
}
}

22
tests/ImageSharp.Tests/TestUtilities/BasicSerializer.cs

@ -13,14 +13,14 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities;
/// </summary>
internal class BasicSerializer : IXunitSerializationInfo
{
private readonly Dictionary<string, string> map = new Dictionary<string, string>();
private readonly Dictionary<string, string> map = [];
public const char Separator = ':';
private string DumpToString(Type type)
{
using var ms = new MemoryStream();
using var writer = new StreamWriter(ms);
using MemoryStream ms = new();
using StreamWriter writer = new(ms);
writer.WriteLine(type.FullName);
foreach (KeyValuePair<string, string> kv in this.map)
{
@ -29,16 +29,16 @@ internal class BasicSerializer : IXunitSerializationInfo
writer.Flush();
byte[] data = ms.ToArray();
return System.Convert.ToBase64String(data);
return Convert.ToBase64String(data);
}
private Type LoadDump(string dump)
{
byte[] data = System.Convert.FromBase64String(dump);
byte[] data = Convert.FromBase64String(dump);
using var ms = new MemoryStream(data);
using var reader = new StreamReader(ms);
var type = Type.GetType(reader.ReadLine());
using MemoryStream ms = new(data);
using StreamReader reader = new(ms);
Type type = Type.GetType(reader.ReadLine());
for (string s = reader.ReadLine(); s != null; s = reader.ReadLine())
{
string[] kv = s.Split(Separator);
@ -50,7 +50,7 @@ internal class BasicSerializer : IXunitSerializationInfo
public static string Serialize(IXunitSerializable serializable)
{
var serializer = new BasicSerializer();
BasicSerializer serializer = new();
serializable.Serialize(serializer);
return serializer.DumpToString(serializable.GetType());
}
@ -58,10 +58,10 @@ internal class BasicSerializer : IXunitSerializationInfo
public static T Deserialize<T>(string dump)
where T : IXunitSerializable
{
var serializer = new BasicSerializer();
BasicSerializer serializer = new();
Type type = serializer.LoadDump(dump);
var result = (T)Activator.CreateInstance(type);
T result = (T)Activator.CreateInstance(type);
result.Deserialize(serializer);
return result;
}

81
tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs

@ -2,6 +2,7 @@
// Licensed under the Six Labors Split License.
using System.Diagnostics;
using System.Globalization;
using Microsoft.DotNet.RemoteExecutor;
using Xunit.Abstractions;
@ -12,7 +13,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities;
/// </summary>
public static class FeatureTestRunner
{
private static readonly char[] SplitChars = { ',', ' ' };
private static readonly char[] SplitChars = [',', ' '];
/// <summary>
/// Allows the deserialization of parameters passed to the feature test.
@ -40,7 +41,7 @@ public static class FeatureTestRunner
/// <returns>The <typeparamref name="T"/> value.</returns>
public static T Deserialize<T>(string value)
where T : IConvertible
=> (T)Convert.ChangeType(value, typeof(T));
=> (T)Convert.ChangeType(value, typeof(T), CultureInfo.InvariantCulture);
/// <summary>
/// Runs the given test <paramref name="action"/> within an environment
@ -127,6 +128,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
@ -170,6 +172,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
@ -214,6 +217,8 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <typeparam name="T2">The addition type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="arg1">The value to pass as a parameter to the test action.</param>
@ -261,6 +266,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="arg1">The value to pass as a parameter to the test action.</param>
@ -307,6 +313,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
/// <param name="intrinsics">The intrinsics features.</param>
@ -350,6 +357,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="arg0">The value to pass as a parameter #0 to the test action.</param>
/// <param name="arg1">The value to pass as a parameter #1 to the test action.</param>
@ -395,10 +403,10 @@ public static class FeatureTestRunner
internal static Dictionary<HwIntrinsics, string> ToFeatureKeyValueCollection(this HwIntrinsics intrinsics)
{
// Loop through and translate the given values into COMPlus equivalents
Dictionary<HwIntrinsics, string> features = new();
Dictionary<HwIntrinsics, string> features = [];
foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries))
{
HwIntrinsics key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic);
HwIntrinsics key = Enum.Parse<HwIntrinsics>(intrinsic);
switch (intrinsic)
{
case nameof(HwIntrinsics.AllowAll):
@ -418,40 +426,47 @@ public static class FeatureTestRunner
}
/// <summary>
/// See <see href="https://github.com/dotnet/runtime/blob/50ac454d8d8a1915188b2a4bb3fff3b81bf6c0cf/src/coreclr/src/jit/jitconfigvalues.h#L224"/>
/// <remarks>
/// <see cref="DisableSIMD"/> ends up impacting all SIMD support(including System.Numerics)
/// but not things like <see cref="DisableBMI1"/>, <see cref="DisableBMI2"/>, and <see cref="DisableLZCNT"/>.
/// </remarks>
/// See <see href="https://github.com/dotnet/runtime/blob/58601ba7da092fe82bb71d087d30df95472968b6/src/coreclr/jit/jitconfigvalues.h#L315"/>
/// </summary>
[Flags]
#pragma warning disable RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute).
public enum HwIntrinsics
public enum HwIntrinsics : long
#pragma warning restore RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute).
{
// Use flags so we can pass multiple values without using params.
// Don't base on 0 or use inverse for All as that doesn't translate to string values.
DisableHWIntrinsic = 1 << 0,
DisableSSE = 1 << 1,
DisableSSE2 = 1 << 2,
DisableAES = 1 << 3,
DisablePCLMULQDQ = 1 << 4,
DisableSSE3 = 1 << 5,
DisableSSSE3 = 1 << 6,
DisableSSE41 = 1 << 7,
DisableSSE42 = 1 << 8,
DisablePOPCNT = 1 << 9,
DisableAVX = 1 << 10,
DisableFMA = 1 << 11,
DisableAVX2 = 1 << 12,
DisableBMI1 = 1 << 13,
DisableBMI2 = 1 << 14,
DisableLZCNT = 1 << 15,
DisableArm64AdvSimd = 1 << 16,
DisableArm64Crc32 = 1 << 17,
DisableArm64Dp = 1 << 18,
DisableArm64Aes = 1 << 19,
DisableArm64Sha1 = 1 << 20,
DisableArm64Sha256 = 1 << 21,
AllowAll = 1 << 22
DisableHWIntrinsic = 1L << 0,
DisableSSE = 1L << 1,
DisableSSE2 = 1L << 2,
DisableAES = 1L << 3,
DisablePCLMULQDQ = 1L << 4,
DisableSSE3 = 1L << 5,
DisableSSSE3 = 1L << 6,
DisableSSE41 = 1L << 7,
DisableSSE42 = 1L << 8,
DisablePOPCNT = 1L << 9,
DisableAVX = 1L << 10,
DisableFMA = 1L << 11,
DisableAVX2 = 1L << 12,
DisableAVXVNNI = 1L << 13,
DisableAVX512BW = 1L << 14,
DisableAVX512BW_VL = 1L << 15,
DisableAVX512CD = 1L << 16,
DisableAVX512CD_VL = 1L << 17,
DisableAVX512DQ = 1L << 18,
DisableAVX512DQ_VL = 1L << 19,
DisableAVX512F = 1L << 20,
DisableAVX512F_VL = 1L << 21,
DisableAVX512VBMI = 1L << 22,
DisableAVX512VBMI_VL = 1L << 23,
DisableBMI1 = 1L << 24,
DisableBMI2 = 1L << 25,
DisableLZCNT = 1L << 26,
DisableArm64AdvSimd = 1L << 27,
DisableArm64Crc32 = 1L << 28,
DisableArm64Dp = 1L << 29,
DisableArm64Aes = 1L << 30,
DisableArm64Sha1 = 1L << 31,
DisableArm64Sha256 = 1L << 32,
AllowAll = 1L << 33
}

3
tests/Images/External/ReferenceOutput/PngEncoderTests/Issue2668_Quantized_Encode_Alpha_Rgba32_Issue_2668.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f934af128b85b9e8f557d71ac8b1f1473a0922d0754fc0c4ece0d0e3d8d94c39
size 7702

3
tests/Images/Input/Jpg/issues/issue-2067-comment.jpg

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d87b5429adeffcfac535aa8af2ec9801bf6c965a2e6751cfec4f8534195ba8f4
size 21082

3
tests/Images/Input/Png/issues/Issue_2666.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ed7665cdfd5fad00c5995040350a254b96af6c0c95ab13975f2291e9d3fce0f3
size 8244837

3
tests/Images/Input/Png/issues/Issue_2668.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e8e5b2b933fd8fefd161f1d22970cb60247fd2d93b6c07b8b9ee1fdbc2241a3c
size 390225

3
tests/Images/Input/Png/issues/bad-ztxt.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:132a70cf0ac458a55cf4a44f4c6c025587491d304595835959955de6682fa472
size 3913750

3
tests/Images/Input/Png/issues/bad-ztxt2.png

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:778a5fc8e915d79e9f55e58c6e4f646ae55dd7e866e65960754cb67a2b445987
size 93

3
tests/Images/Input/Webp/issues/Issue2670.webp

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:23ad5eb449f693af68e51dd108a6b9847a8eb48b82ca5b848395a54c2e0be08f
size 152
Loading…
Cancel
Save