Browse Source

Merge remote-tracking branch 'upstream/master' into bigtiff

pull/1760/head
Ildar Khayrutdinov 5 years ago
parent
commit
7be76f595b
  1. 5
      ImageSharp.sln
  2. 11
      codecov.yml
  3. 2
      shared-infrastructure
  4. 150
      src/ImageSharp/Color/Color.Conversions.cs
  5. 105
      src/ImageSharp/Color/Color.cs
  6. 4
      src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
  7. 2
      src/ImageSharp/Formats/Png/PngDecoderCore.cs
  8. 4
      src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
  9. 12
      src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs
  10. 10
      src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
  11. 24
      src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
  12. 67
      src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
  13. 21
      src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
  14. 22
      src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs
  15. 1
      src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
  16. 10
      src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
  17. 8
      src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
  18. 268
      src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs
  19. 2
      src/ImageSharp/Formats/Webp/Lossless/CostModel.cs
  20. 43
      src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
  21. 9
      src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
  22. 9
      src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
  23. 510
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  24. 6
      src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
  25. 321
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
  26. 37
      src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
  27. 67
      src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
  28. 9
      src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
  29. 3
      src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
  30. 382
      src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
  31. 292
      src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
  32. 60
      src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs
  33. 32
      src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
  34. 366
      src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
  35. 22
      src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs
  36. 38
      src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs
  37. 11
      src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs
  38. 7
      src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs
  39. 14
      src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs
  40. 54
      src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs
  41. 31
      src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs
  42. 56
      src/ImageSharp/Formats/Webp/WebpCommonUtils.cs
  43. 2
      src/ImageSharp/Formats/Webp/WebpDecoderCore.cs
  44. 2
      src/ImageSharp/Formats/Webp/WebpEncoder.cs
  45. 2
      src/ImageSharp/Formats/Webp/WebpEncoderCore.cs
  46. 267
      src/ImageSharp/Formats/Webp/WebpLookupTables.cs
  47. 4
      src/ImageSharp/IO/ChunkedMemoryStream.cs
  48. 51
      src/ImageSharp/Image.Decode.cs
  49. 47
      src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs
  50. 2
      src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs
  51. 49
      tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs
  52. 2
      tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs
  53. 101
      tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs
  54. 4
      tests/ImageSharp.Benchmarks/Config.cs
  55. 1
      tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj
  56. 7
      tests/ImageSharp.Benchmarks/benchmark.sh
  57. 26
      tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs
  58. 92
      tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs
  59. 94
      tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
  60. 122
      tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs
  61. 12
      tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs
  62. 53
      tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs
  63. 98
      tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs
  64. 25
      tests/ImageSharp.Tests/Formats/WebP/WebpMetaDataTests.cs

5
ImageSharp.sln

@ -1,7 +1,7 @@
 
Microsoft Visual Studio Solution File, Format Version 12.00 Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16 # Visual Studio Version 17
VisualStudioVersion = 16.0.28902.138 VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1-D75E-4C6D-83EB-80367343E0D7}" Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1-D75E-4C6D-83EB-80367343E0D7}"
ProjectSection(SolutionItems) = preProject ProjectSection(SolutionItems) = preProject
@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1
ci-build.ps1 = ci-build.ps1 ci-build.ps1 = ci-build.ps1
ci-pack.ps1 = ci-pack.ps1 ci-pack.ps1 = ci-pack.ps1
ci-test.ps1 = ci-test.ps1 ci-test.ps1 = ci-test.ps1
codecov.yml = codecov.yml
Directory.Build.props = Directory.Build.props Directory.Build.props = Directory.Build.props
Directory.Build.targets = Directory.Build.targets Directory.Build.targets = Directory.Build.targets
LICENSE = LICENSE LICENSE = LICENSE

11
codecov.yml

@ -9,3 +9,14 @@ codecov:
# Avoid Report Expired # Avoid Report Expired
# https://docs.codecov.io/docs/codecov-yaml#section-expired-reports # https://docs.codecov.io/docs/codecov-yaml#section-expired-reports
max_report_age: off max_report_age: off
coverage:
# Use integer precision
# https://docs.codecov.com/docs/codecovyml-reference#coverageprecision
precision: 0
# Explicitly control coverage status checks
# https://docs.codecov.com/docs/commit-status#disabling-a-status
status:
project: on
patch: off

2
shared-infrastructure

@ -1 +1 @@
Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3 Subproject commit 33cb12ca77f919b44de56f344d2627cc2a108c3a

150
src/ImageSharp/Color/Color.Conversions.cs

@ -17,56 +17,118 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
/// <param name="pixel">The <see cref="Rgba64"/> containing the color information.</param> /// <param name="pixel">The <see cref="Rgba64"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public Color(Rgba64 pixel) => this.data = pixel; public Color(Rgba64 pixel)
{
this.data = pixel;
this.boxedHighPrecisionPixel = null;
}
/// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary>
/// <param name="pixel">The <see cref="Rgb48"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public Color(Rgb48 pixel)
{
this.data = new Rgba64(pixel.R, pixel.G, pixel.B, ushort.MaxValue);
this.boxedHighPrecisionPixel = null;
}
/// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary>
/// <param name="pixel">The <see cref="La32"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public Color(La32 pixel)
{
this.data = new Rgba64(pixel.L, pixel.L, pixel.L, pixel.A);
this.boxedHighPrecisionPixel = null;
}
/// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary>
/// <param name="pixel">The <see cref="L16"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public Color(L16 pixel)
{
this.data = new Rgba64(pixel.PackedValue, pixel.PackedValue, pixel.PackedValue, ushort.MaxValue);
this.boxedHighPrecisionPixel = null;
}
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct. /// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary> /// </summary>
/// <param name="pixel">The <see cref="Rgba32"/> containing the color information.</param> /// <param name="pixel">The <see cref="Rgba32"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public Color(Rgba32 pixel) => this.data = new Rgba64(pixel); public Color(Rgba32 pixel)
{
this.data = new Rgba64(pixel);
this.boxedHighPrecisionPixel = null;
}
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct. /// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary> /// </summary>
/// <param name="pixel">The <see cref="Argb32"/> containing the color information.</param> /// <param name="pixel">The <see cref="Argb32"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public Color(Argb32 pixel) => this.data = new Rgba64(pixel); public Color(Argb32 pixel)
{
this.data = new Rgba64(pixel);
this.boxedHighPrecisionPixel = null;
}
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct. /// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary> /// </summary>
/// <param name="pixel">The <see cref="Bgra32"/> containing the color information.</param> /// <param name="pixel">The <see cref="Bgra32"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public Color(Bgra32 pixel) => this.data = new Rgba64(pixel); public Color(Bgra32 pixel)
{
this.data = new Rgba64(pixel);
this.boxedHighPrecisionPixel = null;
}
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct. /// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary> /// </summary>
/// <param name="pixel">The <see cref="Rgb24"/> containing the color information.</param> /// <param name="pixel">The <see cref="Rgb24"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public Color(Rgb24 pixel) => this.data = new Rgba64(pixel); public Color(Rgb24 pixel)
{
this.data = new Rgba64(pixel);
this.boxedHighPrecisionPixel = null;
}
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct. /// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary> /// </summary>
/// <param name="pixel">The <see cref="Bgr24"/> containing the color information.</param> /// <param name="pixel">The <see cref="Bgr24"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public Color(Bgr24 pixel) => this.data = new Rgba64(pixel); public Color(Bgr24 pixel)
{
this.data = new Rgba64(pixel);
this.boxedHighPrecisionPixel = null;
}
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Color"/> struct. /// Initializes a new instance of the <see cref="Color"/> struct.
/// </summary> /// </summary>
/// <param name="vector">The <see cref="Vector4"/> containing the color information.</param> /// <param name="vector">The <see cref="Vector4"/> containing the color information.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public Color(Vector4 vector) => this.data = new Rgba64(vector); public Color(Vector4 vector)
{
vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One);
this.boxedHighPrecisionPixel = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W);
this.data = default;
}
/// <summary> /// <summary>
/// Converts a <see cref="Color"/> to <see cref="Vector4"/>. /// Converts a <see cref="Color"/> to <see cref="Vector4"/>.
/// </summary> /// </summary>
/// <param name="color">The <see cref="Color"/>.</param> /// <param name="color">The <see cref="Color"/>.</param>
/// <returns>The <see cref="Vector4"/>.</returns> /// <returns>The <see cref="Vector4"/>.</returns>
public static explicit operator Vector4(Color color) => color.data.ToVector4(); public static explicit operator Vector4(Color color) => color.ToVector4();
/// <summary> /// <summary>
/// Converts an <see cref="Vector4"/> to <see cref="Color"/>. /// Converts an <see cref="Vector4"/> to <see cref="Color"/>.
@ -74,24 +136,82 @@ namespace SixLabors.ImageSharp
/// <param name="source">The <see cref="Vector4"/>.</param> /// <param name="source">The <see cref="Vector4"/>.</param>
/// <returns>The <see cref="Color"/>.</returns> /// <returns>The <see cref="Color"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static explicit operator Color(Vector4 source) => new Color(source); public static explicit operator Color(Vector4 source) => new(source);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal Rgba32 ToRgba32() => this.data.ToRgba32(); internal Rgba32 ToRgba32()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data.ToRgba32();
}
Rgba32 value = default;
this.boxedHighPrecisionPixel.ToRgba32(ref value);
return value;
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal Bgra32 ToBgra32() => this.data.ToBgra32(); internal Bgra32 ToBgra32()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data.ToBgra32();
}
Bgra32 value = default;
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
return value;
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal Argb32 ToArgb32() => this.data.ToArgb32(); internal Argb32 ToArgb32()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data.ToArgb32();
}
Argb32 value = default;
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
return value;
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal Rgb24 ToRgb24() => this.data.ToRgb24(); internal Rgb24 ToRgb24()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data.ToRgb24();
}
Rgb24 value = default;
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
return value;
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal Bgr24 ToBgr24() => this.data.ToBgr24(); internal Bgr24 ToBgr24()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data.ToBgr24();
}
Bgr24 value = default;
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
return value;
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal Vector4 ToVector4() => this.data.ToVector4(); internal Vector4 ToVector4()
{
if (this.boxedHighPrecisionPixel is null)
{
return this.data.ToScaledVector4();
}
return this.boxedHighPrecisionPixel.ToScaledVector4();
}
} }
} }

105
src/ImageSharp/Color/Color.cs

@ -4,7 +4,6 @@
using System; using System;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp namespace SixLabors.ImageSharp
@ -21,6 +20,7 @@ namespace SixLabors.ImageSharp
public readonly partial struct Color : IEquatable<Color> public readonly partial struct Color : IEquatable<Color>
{ {
private readonly Rgba64 data; private readonly Rgba64 data;
private readonly IPixel boxedHighPrecisionPixel;
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private Color(byte r, byte g, byte b, byte a) private Color(byte r, byte g, byte b, byte a)
@ -30,6 +30,8 @@ namespace SixLabors.ImageSharp
ColorNumerics.UpscaleFrom8BitTo16Bit(g), ColorNumerics.UpscaleFrom8BitTo16Bit(g),
ColorNumerics.UpscaleFrom8BitTo16Bit(b), ColorNumerics.UpscaleFrom8BitTo16Bit(b),
ColorNumerics.UpscaleFrom8BitTo16Bit(a)); ColorNumerics.UpscaleFrom8BitTo16Bit(a));
this.boxedHighPrecisionPixel = null;
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
@ -40,6 +42,15 @@ namespace SixLabors.ImageSharp
ColorNumerics.UpscaleFrom8BitTo16Bit(g), ColorNumerics.UpscaleFrom8BitTo16Bit(g),
ColorNumerics.UpscaleFrom8BitTo16Bit(b), ColorNumerics.UpscaleFrom8BitTo16Bit(b),
ushort.MaxValue); ushort.MaxValue);
this.boxedHighPrecisionPixel = null;
}
[MethodImpl(InliningOptions.ShortMethod)]
private Color(IPixel pixel)
{
this.boxedHighPrecisionPixel = pixel;
this.data = default;
} }
/// <summary> /// <summary>
@ -52,13 +63,10 @@ namespace SixLabors.ImageSharp
/// otherwise, false. /// otherwise, false.
/// </returns> /// </returns>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static bool operator ==(Color left, Color right) public static bool operator ==(Color left, Color right) => left.Equals(right);
{
return left.Equals(right);
}
/// <summary> /// <summary>
/// Checks whether two <see cref="Color"/> structures are equal. /// Checks whether two <see cref="Color"/> structures are not equal.
/// </summary> /// </summary>
/// <param name="left">The left hand <see cref="Color"/> operand.</param> /// <param name="left">The left hand <see cref="Color"/> operand.</param>
/// <param name="right">The right hand <see cref="Color"/> operand.</param> /// <param name="right">The right hand <see cref="Color"/> operand.</param>
@ -67,10 +75,7 @@ namespace SixLabors.ImageSharp
/// otherwise, false. /// otherwise, false.
/// </returns> /// </returns>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static bool operator !=(Color left, Color right) public static bool operator !=(Color left, Color right) => !left.Equals(right);
{
return !left.Equals(right);
}
/// <summary> /// <summary>
/// Creates a <see cref="Color"/> from RGBA bytes. /// Creates a <see cref="Color"/> from RGBA bytes.
@ -81,7 +86,7 @@ namespace SixLabors.ImageSharp
/// <param name="a">The alpha component (0-255).</param> /// <param name="a">The alpha component (0-255).</param>
/// <returns>The <see cref="Color"/>.</returns> /// <returns>The <see cref="Color"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a); public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a);
/// <summary> /// <summary>
/// Creates a <see cref="Color"/> from RGB bytes. /// Creates a <see cref="Color"/> from RGB bytes.
@ -91,7 +96,46 @@ namespace SixLabors.ImageSharp
/// <param name="b">The blue component (0-255).</param> /// <param name="b">The blue component (0-255).</param>
/// <returns>The <see cref="Color"/>.</returns> /// <returns>The <see cref="Color"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b); public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b);
/// <summary>
/// Creates a <see cref="Color"/> from the given <typeparamref name="TPixel"/>.
/// </summary>
/// <param name="pixel">The pixel to convert from.</param>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <returns>The <see cref="Color"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static Color FromPixel<TPixel>(TPixel pixel)
where TPixel : unmanaged, IPixel<TPixel>
{
// Avoid boxing in case we can convert to Rgba64 safely and efficently
if (typeof(TPixel) == typeof(Rgba64))
{
return new((Rgba64)(object)pixel);
}
else if (typeof(TPixel) == typeof(Rgb48))
{
return new((Rgb48)(object)pixel);
}
else if (typeof(TPixel) == typeof(La32))
{
return new((La32)(object)pixel);
}
else if (typeof(TPixel) == typeof(L16))
{
return new((L16)(object)pixel);
}
else if (Unsafe.SizeOf<TPixel>() <= Unsafe.SizeOf<Rgba32>())
{
Rgba32 p = default;
pixel.ToRgba32(ref p);
return new(p);
}
else
{
return new(pixel);
}
}
/// <summary> /// <summary>
/// Creates a new instance of the <see cref="Color"/> struct /// Creates a new instance of the <see cref="Color"/> struct
@ -213,7 +257,7 @@ namespace SixLabors.ImageSharp
public override string ToString() => this.ToHex(); public override string ToString() => this.ToHex();
/// <summary> /// <summary>
/// Converts the color instance to a specified <see cref="IPixel{TSelf}"/> type. /// Converts the color instance to a specified <typeparamref name="TPixel"/> type.
/// </summary> /// </summary>
/// <typeparam name="TPixel">The pixel type to convert to.</typeparam> /// <typeparam name="TPixel">The pixel type to convert to.</typeparam>
/// <returns>The pixel value.</returns> /// <returns>The pixel value.</returns>
@ -221,13 +265,18 @@ namespace SixLabors.ImageSharp
public TPixel ToPixel<TPixel>() public TPixel ToPixel<TPixel>()
where TPixel : unmanaged, IPixel<TPixel> where TPixel : unmanaged, IPixel<TPixel>
{ {
TPixel pixel = default; if (this.boxedHighPrecisionPixel is TPixel pixel)
{
return pixel;
}
pixel = default;
pixel.FromRgba64(this.data); pixel.FromRgba64(this.data);
return pixel; return pixel;
} }
/// <summary> /// <summary>
/// Bulk converts a span of <see cref="Color"/> to a span of a specified <see cref="IPixel{TSelf}"/> type. /// Bulk converts a span of <see cref="Color"/> to a span of a specified <typeparamref name="TPixel"/> type.
/// </summary> /// </summary>
/// <typeparam name="TPixel">The pixel type to convert to.</typeparam> /// <typeparam name="TPixel">The pixel type to convert to.</typeparam>
/// <param name="configuration">The configuration.</param> /// <param name="configuration">The configuration.</param>
@ -240,28 +289,38 @@ namespace SixLabors.ImageSharp
Span<TPixel> destination) Span<TPixel> destination)
where TPixel : unmanaged, IPixel<TPixel> where TPixel : unmanaged, IPixel<TPixel>
{ {
ReadOnlySpan<Rgba64> rgba64Span = MemoryMarshal.Cast<Color, Rgba64>(source); Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination));
PixelOperations<TPixel>.Instance.FromRgba64(configuration, rgba64Span, destination); for (int i = 0; i < source.Length; i++)
{
destination[i] = source[i].ToPixel<TPixel>();
}
} }
/// <inheritdoc /> /// <inheritdoc />
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public bool Equals(Color other) public bool Equals(Color other)
{ {
return this.data.PackedValue == other.data.PackedValue; if (this.boxedHighPrecisionPixel is null && other.boxedHighPrecisionPixel is null)
{
return this.data.PackedValue == other.data.PackedValue;
}
return this.boxedHighPrecisionPixel?.Equals(other.boxedHighPrecisionPixel) == true;
} }
/// <inheritdoc /> /// <inheritdoc />
public override bool Equals(object obj) public override bool Equals(object obj) => obj is Color other && this.Equals(other);
{
return obj is Color other && this.Equals(other);
}
/// <inheritdoc /> /// <inheritdoc />
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public override int GetHashCode() public override int GetHashCode()
{ {
return this.data.PackedValue.GetHashCode(); if (this.boxedHighPrecisionPixel is null)
{
return this.data.PackedValue.GetHashCode();
}
return this.boxedHighPrecisionPixel.GetHashCode();
} }
} }
} }

4
src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs

@ -288,8 +288,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
/// <param name="componentCount">The number of components to write.</param> /// <param name="componentCount">The number of components to write.</param>
private void WriteDefineHuffmanTables(int componentCount) private void WriteDefineHuffmanTables(int componentCount)
{ {
// This uses a C#'s compiler optimization that refers to the static data segment of the assembly,
// and doesn't incur any allocation at all.
// Table identifiers. // Table identifiers.
ReadOnlySpan<byte> headers = stackalloc byte[] ReadOnlySpan<byte> headers = new byte[]
{ {
0x00, 0x00,
0x10, 0x10,

2
src/ImageSharp/Formats/Png/PngDecoderCore.cs

@ -1071,7 +1071,7 @@ namespace SixLabors.ImageSharp.Formats.Png
int bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length); int bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length);
while (bytesRead != 0) while (bytesRead != 0)
{ {
uncompressedBytes.AddRange(this.buffer.AsSpan().Slice(0, bytesRead).ToArray()); uncompressedBytes.AddRange(this.buffer.AsSpan(0, bytesRead).ToArray());
bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length); bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length);
} }

4
src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs

@ -256,8 +256,8 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Compressors
private void ResetTables() private void ResetTables()
{ {
this.children.GetSpan().Fill(0); this.children.GetSpan().Clear();
this.siblings.GetSpan().Fill(0); this.siblings.GetSpan().Clear();
this.bitsPerCode = MinBits; this.bitsPerCode = MinBits;
this.maxCode = MaxValue(this.bitsPerCode); this.maxCode = MaxValue(this.bitsPerCode);
this.nextValidCode = EoiCode + 1; this.nextValidCode = EoiCode + 1;

12
src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs

@ -64,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
uint bitsWritten = 0; uint bitsWritten = 0;
for (int y = 0; y < height; y++) for (int y = 0; y < height; y++)
{ {
scanLine.Fill(0); scanLine.Clear();
Decode2DScanline(bitReader, this.isWhiteZero, referenceScanLine, scanLine); Decode2DScanline(bitReader, this.isWhiteZero, referenceScanLine, scanLine);
bitsWritten = this.WriteScanLine(buffer, scanLine, bitsWritten); bitsWritten = this.WriteScanLine(buffer, scanLine, bitsWritten);
@ -116,7 +116,15 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
{ {
// If a TIFF reader encounters EOFB before the expected number of lines has been extracted, // If a TIFF reader encounters EOFB before the expected number of lines has been extracted,
// it is appropriate to assume that the missing rows consist entirely of white pixels. // it is appropriate to assume that the missing rows consist entirely of white pixels.
scanline.Fill(whiteIsZero ? (byte)0 : (byte)255); if (whiteIsZero)
{
scanline.Clear();
}
else
{
scanline.Fill((byte)255);
}
break; break;
} }

10
src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs

@ -142,10 +142,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public bool ReadBool() => this.ReadValue(1) is 1; public bool ReadBool() => this.ReadValue(1) is 1;
[MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits) public uint ReadValue(int nBits)
{ {
Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
uint v = 0; uint v = 0;
while (nBits-- > 0) while (nBits-- > 0)
@ -156,10 +157,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
return v; return v;
} }
[MethodImpl(InliningOptions.ShortMethod)]
public int ReadSignedValue(int nBits) public int ReadSignedValue(int nBits)
{ {
Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
int value = (int)this.ReadValue(nBits); int value = (int)this.ReadValue(nBits);
return this.ReadValue(1) != 0 ? -value : value; return this.ReadValue(1) != 0 ? -value : value;

24
src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs

@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// </summary> /// </summary>
private const int Wbits = 32; private const int Wbits = 32;
private readonly uint[] bitMask = private static readonly uint[] BitMask =
{ {
0, 0,
0x000001, 0x000003, 0x000007, 0x00000f, 0x000001, 0x000003, 0x000007, 0x00000f,
@ -125,19 +125,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// </summary> /// </summary>
/// <param name="nBits">The number of bits to read (should not exceed 16).</param> /// <param name="nBits">The number of bits to read (should not exceed 16).</param>
/// <returns>A ushort value.</returns> /// <returns>A ushort value.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits) public uint ReadValue(int nBits)
{ {
Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
if (!this.Eos && nBits <= Vp8LMaxNumBitRead) if (!this.Eos && nBits <= Vp8LMaxNumBitRead)
{ {
ulong val = this.PrefetchBits() & this.bitMask[nBits]; ulong val = this.PrefetchBits() & BitMask[nBits];
this.bitPos += nBits; this.bitPos += nBits;
this.ShiftBytes(); this.ShiftBytes();
return (uint)val; return (uint)val;
} }
this.SetEndOfStream();
return 0; return 0;
} }
@ -169,6 +169,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// <summary> /// <summary>
/// Advances the read buffer by 4 bytes to make room for reading next 32 bits. /// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void FillBitWindow() public void FillBitWindow()
{ {
if (this.bitPos >= Wbits) if (this.bitPos >= Wbits)
@ -181,7 +182,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// Returns true if there was an attempt at reading bit past the end of the buffer. /// Returns true if there was an attempt at reading bit past the end of the buffer.
/// </summary> /// </summary>
/// <returns>True, if end of buffer was reached.</returns> /// <returns>True, if end of buffer was reached.</returns>
public bool IsEndOfStream() => this.Eos || ((this.pos == this.len) && (this.bitPos > Lbits)); [MethodImpl(InliningOptions.ShortMethod)]
public bool IsEndOfStream() => this.Eos || (this.pos == this.len && this.bitPos > Lbits);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private void DoFillBitWindow() => this.ShiftBytes(); private void DoFillBitWindow() => this.ShiftBytes();
@ -189,6 +191,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// <summary> /// <summary>
/// If not at EOS, reload up to Vp8LLbits byte-by-byte. /// If not at EOS, reload up to Vp8LLbits byte-by-byte.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void ShiftBytes() private void ShiftBytes()
{ {
System.Span<byte> dataSpan = this.Data.Memory.Span; System.Span<byte> dataSpan = this.Data.Memory.Span;
@ -199,17 +202,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
++this.pos; ++this.pos;
this.bitPos -= 8; this.bitPos -= 8;
} }
if (this.IsEndOfStream())
{
this.SetEndOfStream();
}
}
private void SetEndOfStream()
{
this.Eos = true;
this.bitPos = 0; // To avoid undefined behaviour with shifts.
} }
} }
} }

67
src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs

@ -10,11 +10,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
{ {
internal abstract class BitWriterBase internal abstract class BitWriterBase
{ {
private const uint MaxDimension = 16777215;
private const ulong MaxCanvasPixels = 4294967295ul;
protected const uint ExtendedFileChunkSize = WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize;
/// <summary> /// <summary>
/// Buffer to write to. /// Buffer to write to.
/// </summary> /// </summary>
private byte[] buffer; private byte[] buffer;
/// <summary>
/// A scratch buffer to reduce allocations.
/// </summary>
private readonly byte[] scratchBuffer = new byte[4];
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="BitWriterBase"/> class. /// Initializes a new instance of the <see cref="BitWriterBase"/> class.
/// </summary> /// </summary>
@ -52,15 +63,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// </summary> /// </summary>
public abstract void Finish(); public abstract void Finish();
/// <summary>
/// Writes the encoded image to the stream.
/// </summary>
/// <param name="stream">The stream to write to.</param>
/// <param name="exifProfile">The exif profile.</param>
/// <param name="width">The width of the image.</param>
/// <param name="height">The height of the image.</param>
public abstract void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height);
protected void ResizeBuffer(int maxBytes, int sizeRequired) protected void ResizeBuffer(int maxBytes, int sizeRequired)
{ {
int newSize = (3 * maxBytes) >> 1; int newSize = (3 * maxBytes) >> 1;
@ -81,13 +83,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// <param name="riffSize">The block length.</param> /// <param name="riffSize">The block length.</param>
protected void WriteRiffHeader(Stream stream, uint riffSize) protected void WriteRiffHeader(Stream stream, uint riffSize)
{ {
Span<byte> buf = stackalloc byte[4];
stream.Write(WebpConstants.RiffFourCc); stream.Write(WebpConstants.RiffFourCc);
BinaryPrimitives.WriteUInt32LittleEndian(buf, riffSize); BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, riffSize);
stream.Write(buf); stream.Write(this.scratchBuffer.AsSpan(0, 4));
stream.Write(WebpConstants.WebpHeader); stream.Write(WebpConstants.WebpHeader);
} }
/// <summary>
/// Calculates the exif chunk size.
/// </summary>
/// <param name="exifBytes">The exif profile bytes.</param>
/// <returns>The exif chunk size in bytes.</returns>
protected uint ExifChunkSize(byte[] exifBytes)
{
uint exifSize = (uint)exifBytes.Length;
uint exifChunkSize = WebpConstants.ChunkHeaderSize + exifSize + (exifSize & 1);
return exifChunkSize;
}
/// <summary> /// <summary>
/// Writes the Exif profile to the stream. /// Writes the Exif profile to the stream.
/// </summary> /// </summary>
@ -97,12 +111,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
{ {
DebugGuard.NotNull(exifBytes, nameof(exifBytes)); DebugGuard.NotNull(exifBytes, nameof(exifBytes));
Span<byte> buf = stackalloc byte[4]; uint size = (uint)exifBytes.Length;
Span<byte> buf = this.scratchBuffer.AsSpan(0, 4);
BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Exif); BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Exif);
stream.Write(buf); stream.Write(buf);
BinaryPrimitives.WriteUInt32LittleEndian(buf, (uint)exifBytes.Length); BinaryPrimitives.WriteUInt32LittleEndian(buf, size);
stream.Write(buf); stream.Write(buf);
stream.Write(exifBytes); stream.Write(exifBytes);
// Add padding byte if needed.
if ((size & 1) == 1)
{
stream.WriteByte(0);
}
} }
/// <summary> /// <summary>
@ -112,16 +133,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// <param name="exifProfile">A exif profile or null, if it does not exist.</param> /// <param name="exifProfile">A exif profile or null, if it does not exist.</param>
/// <param name="width">The width of the image.</param> /// <param name="width">The width of the image.</param>
/// <param name="height">The height of the image.</param> /// <param name="height">The height of the image.</param>
protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height) /// <param name="hasAlpha">Flag indicating, if a alpha channel is present.</param>
protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{ {
int maxDimension = 16777215; if (width > MaxDimension || height > MaxDimension)
if (width > maxDimension || height > maxDimension)
{ {
WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {maxDimension}"); WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {MaxDimension}");
} }
// The spec states that the product of Canvas Width and Canvas Height MUST be at most 2^32 - 1. // The spec states that the product of Canvas Width and Canvas Height MUST be at most 2^32 - 1.
if (width * height > 4294967295ul) if (width * height > MaxCanvasPixels)
{ {
WebpThrowHelper.ThrowInvalidImageDimensions("The product of image width and height MUST be at most 2^32 - 1"); WebpThrowHelper.ThrowInvalidImageDimensions("The product of image width and height MUST be at most 2^32 - 1");
} }
@ -133,7 +154,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
flags |= 8; flags |= 8;
} }
Span<byte> buf = stackalloc byte[4]; if (hasAlpha)
{
// Set alpha bit.
flags |= 16;
}
Span<byte> buf = this.scratchBuffer.AsSpan(0, 4);
stream.Write(WebpConstants.Vp8XMagicBytes); stream.Write(WebpConstants.Vp8XMagicBytes);
BinaryPrimitives.WriteUInt32LittleEndian(buf, WebpConstants.Vp8XChunkSize); BinaryPrimitives.WriteUInt32LittleEndian(buf, WebpConstants.Vp8XChunkSize);
stream.Write(buf); stream.Write(buf);

21
src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs

@ -399,8 +399,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
} }
} }
/// <inheritdoc/> /// <summary>
public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height) /// Writes the encoded image to the stream.
/// </summary>
/// <param name="stream">The stream to write to.</param>
/// <param name="exifProfile">The exif profile.</param>
/// <param name="width">The width of the image.</param>
/// <param name="height">The height of the image.</param>
/// <param name="hasAlpha">Flag indicating, if a alpha channel is present.</param>
public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{ {
bool isVp8X = false; bool isVp8X = false;
byte[] exifBytes = null; byte[] exifBytes = null;
@ -408,9 +415,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
if (exifProfile != null) if (exifProfile != null)
{ {
isVp8X = true; isVp8X = true;
riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize; riffSize += ExtendedFileChunkSize;
exifBytes = exifProfile.ToByteArray(); exifBytes = exifProfile.ToByteArray();
riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length; riffSize += this.ExifChunkSize(exifBytes);
} }
this.Finish(); this.Finish();
@ -433,7 +440,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size; riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size;
// Emit headers and partition #0 // Emit headers and partition #0
this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile); this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, hasAlpha);
bitWriterPartZero.WriteToStream(stream); bitWriterPartZero.WriteToStream(stream);
// Write the encoded image to the stream. // Write the encoded image to the stream.
@ -616,14 +623,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
while (it.Next()); while (it.Next());
} }
private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile) private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile, bool hasAlpha)
{ {
this.WriteRiffHeader(stream, riffSize); this.WriteRiffHeader(stream, riffSize);
// Write VP8X, header if necessary. // Write VP8X, header if necessary.
if (isVp8X) if (isVp8X)
{ {
this.WriteVp8XHeader(stream, exifProfile, width, height); this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha);
} }
this.WriteVp8Header(stream, vp8Size); this.WriteVp8Header(stream, vp8Size);

22
src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs

@ -127,19 +127,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
this.used = 0; this.used = 0;
} }
/// <inheritdoc/> /// <summary>
public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height) /// Writes the encoded image to the stream.
/// </summary>
/// <param name="stream">The stream to write to.</param>
/// <param name="exifProfile">The exif profile.</param>
/// <param name="width">The width of the image.</param>
/// <param name="height">The height of the image.</param>
/// <param name="hasAlpha">Flag indicating, if a alpha channel is present.</param>
public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{ {
Span<byte> buffer = stackalloc byte[4];
bool isVp8X = false; bool isVp8X = false;
byte[] exifBytes = null; byte[] exifBytes = null;
uint riffSize = 0; uint riffSize = 0;
if (exifProfile != null) if (exifProfile != null)
{ {
isVp8X = true; isVp8X = true;
riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize; riffSize += ExtendedFileChunkSize;
exifBytes = exifProfile.ToByteArray(); exifBytes = exifProfile.ToByteArray();
riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length; riffSize += this.ExifChunkSize(exifBytes);
} }
this.Finish(); this.Finish();
@ -154,15 +160,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
// Write VP8X, header if necessary. // Write VP8X, header if necessary.
if (isVp8X) if (isVp8X)
{ {
this.WriteVp8XHeader(stream, exifProfile, width, height); this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha);
} }
// Write magic bytes indicating its a lossless webp. // Write magic bytes indicating its a lossless webp.
stream.Write(WebpConstants.Vp8LMagicBytes); stream.Write(WebpConstants.Vp8LMagicBytes);
// Write Vp8 Header. // Write Vp8 Header.
BinaryPrimitives.WriteUInt32LittleEndian(buffer, size); BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, size);
stream.Write(buffer); stream.Write(this.scratchBuffer.AsSpan(0, 4));
stream.WriteByte(WebpConstants.Vp8LHeaderMagicByte); stream.WriteByte(WebpConstants.Vp8LHeaderMagicByte);
// Write the encoded bytes of the image to the stream. // Write the encoded bytes of the image to the stream.

1
src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs

@ -35,6 +35,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
/// <summary> /// <summary>
/// Gets the number of entropy-analysis passes (in [1..10]). /// Gets the number of entropy-analysis passes (in [1..10]).
/// Defaults to 1.
/// </summary> /// </summary>
int EntropyPasses { get; } int EntropyPasses { get; }

10
src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs

@ -49,6 +49,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bitCostBest = -1; double bitCostBest = -1;
int cacheBitsInitial = cacheBits; int cacheBitsInitial = cacheBits;
Vp8LHashChain hashChainBox = null; Vp8LHashChain hashChainBox = null;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1) for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1)
{ {
int cacheBitsTmp = cacheBitsInitial; int cacheBitsTmp = cacheBitsInitial;
@ -81,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Keep the best backward references. // Keep the best backward references.
var histo = new Vp8LHistogram(worst, cacheBitsTmp); var histo = new Vp8LHistogram(worst, cacheBitsTmp);
double bitCost = histo.EstimateBits(); double bitCost = histo.EstimateBits(stats, bitsEntropy);
if (lz77TypeBest == 0 || bitCost < bitCostBest) if (lz77TypeBest == 0 || bitCost < bitCostBest)
{ {
@ -100,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox;
BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst); BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst);
var histo = new Vp8LHistogram(worst, cacheBits); var histo = new Vp8LHistogram(worst, cacheBits);
double bitCostTrace = histo.EstimateBits(); double bitCostTrace = histo.EstimateBits(stats, bitsEntropy);
if (bitCostTrace < bitCostBest) if (bitCostTrace < bitCostBest)
{ {
best = worst; best = worst;
@ -214,9 +216,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i <= cacheBitsMax; i++) for (int i = 0; i <= cacheBitsMax; i++)
{ {
double entropy = histos[i].EstimateBits(); double entropy = histos[i].EstimateBits(stats, bitsEntropy);
if (i == 0 || entropy < entropyMin) if (i == 0 || entropy < entropyMin)
{ {
entropyMin = entropy; entropyMin = entropy;

8
src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs

@ -1,6 +1,8 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
/// <summary> /// <summary>
@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Inserts a new color into the cache. /// Inserts a new color into the cache.
/// </summary> /// </summary>
/// <param name="bgra">The color to insert.</param> /// <param name="bgra">The color to insert.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void Insert(uint bgra) public void Insert(uint bgra)
{ {
int key = HashPix(bgra, this.HashShift); int key = HashPix(bgra, this.HashShift);
@ -52,6 +55,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
/// <param name="key">The key to lookup.</param> /// <param name="key">The key to lookup.</param>
/// <returns>The color for the key.</returns> /// <returns>The color for the key.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public uint Lookup(int key) => this.Colors[key]; public uint Lookup(int key) => this.Colors[key];
/// <summary> /// <summary>
@ -59,6 +63,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
/// <param name="bgra">The color to check.</param> /// <param name="bgra">The color to check.</param>
/// <returns>The index of the color in the cache or -1 if its not present.</returns> /// <returns>The index of the color in the cache or -1 if its not present.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public int Contains(uint bgra) public int Contains(uint bgra)
{ {
int key = HashPix(bgra, this.HashShift); int key = HashPix(bgra, this.HashShift);
@ -70,6 +75,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
/// <param name="bgra">The color.</param> /// <param name="bgra">The color.</param>
/// <returns>The index for the color.</returns> /// <returns>The index for the color.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift); public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift);
/// <summary> /// <summary>
@ -77,8 +83,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
/// <param name="key">The key.</param> /// <param name="key">The key.</param>
/// <param name="bgra">The color to add.</param> /// <param name="bgra">The color to add.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void Set(uint key, uint bgra) => this.Colors[key] = bgra; public void Set(uint key, uint bgra) => this.Colors[key] = bgra;
[MethodImpl(InliningOptions.ShortMethod)]
public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift); public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift);
} }
} }

268
src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs

@ -0,0 +1,268 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal static class ColorSpaceTransformUtils
{
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
private static readonly Vector256<byte> CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte();
private static readonly Vector256<byte> CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte();
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
#endif
public static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && tileWidth >= 16)
{
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r = Avx2.Or(r0, r1);
Vector256<byte> gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
Vector256<byte> gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
Vector256<ushort> gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector256<byte> g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
Vector256<short> a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
Vector256<byte> c = Avx2.Subtract(gb.AsByte(), b.AsByte());
Vector256<byte> d = Avx2.Subtract(c, a.AsByte());
Vector256<byte> e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else if (Sse41.IsSupported)
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else
#endif
{
CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
}
}
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
public static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && tileWidth >= 16)
{
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0
Vector256<byte> g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256);
Vector256<ushort> g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector256<int> a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector256<int> a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16);
Vector256<ushort> a = Avx2.PackUnsignedSaturate(a0, a1); // x r
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector256<byte> c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector256<byte> d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r'
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = d.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = d.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else
#endif
{
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
}
}
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
}
}

2
src/ImageSharp/Formats/Webp/Lossless/CostModel.cs

@ -87,7 +87,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (nonzeros <= 1) if (nonzeros <= 1)
{ {
output.AsSpan(0, numSymbols).Fill(0); output.AsSpan(0, numSymbols).Clear();
} }
else else
{ {

43
src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs

@ -152,10 +152,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static int HistogramCopyAndAnalyze(List<Vp8LHistogram> origHistograms, List<Vp8LHistogram> histograms, ushort[] histogramSymbols) private static int HistogramCopyAndAnalyze(List<Vp8LHistogram> origHistograms, List<Vp8LHistogram> histograms, ushort[] histogramSymbols)
{ {
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int clusterId = 0, i = 0; i < origHistograms.Count; i++) for (int clusterId = 0, i = 0; i < origHistograms.Count; i++)
{ {
Vp8LHistogram origHistogram = origHistograms[i]; Vp8LHistogram origHistogram = origHistograms[i];
origHistogram.UpdateHistogramCost(); origHistogram.UpdateHistogramCost(stats, bitsEntropy);
// Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77). // Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77).
if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4]) if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4])
@ -175,7 +177,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return numUsed; return numUsed;
} }
private static void HistogramCombineEntropyBin(List<Vp8LHistogram> histograms, ushort[] clusters, ushort[] clusterMappings, Vp8LHistogram curCombo, ushort[] binMap, int numBins, double combineCostFactor) private static void HistogramCombineEntropyBin(
List<Vp8LHistogram> histograms,
ushort[] clusters,
ushort[] clusterMappings,
Vp8LHistogram curCombo,
ushort[] binMap,
int numBins,
double combineCostFactor)
{ {
var binInfo = new HistogramBinInfo[BinSize]; var binInfo = new HistogramBinInfo[BinSize];
for (int idx = 0; idx < numBins; idx++) for (int idx = 0; idx < numBins; idx++)
@ -191,6 +200,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
var indicesToRemove = new List<int>(); var indicesToRemove = new List<int>();
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int idx = 0; idx < histograms.Count; idx++) for (int idx = 0; idx < histograms.Count; idx++)
{ {
if (histograms[idx] == null) if (histograms[idx] == null)
@ -209,7 +220,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Try to merge #idx into #first (both share the same binId) // Try to merge #idx into #first (both share the same binId)
double bitCost = histograms[idx].BitCost; double bitCost = histograms[idx].BitCost;
double bitCostThresh = -bitCost * combineCostFactor; double bitCostThresh = -bitCost * combineCostFactor;
double currCostDiff = histograms[first].AddEval(histograms[idx], bitCostThresh, curCombo); double currCostDiff = histograms[first].AddEval(histograms[idx], stats, bitsEntropy, bitCostThresh, curCombo);
if (currCostDiff < bitCostThresh) if (currCostDiff < bitCostThresh)
{ {
@ -276,7 +287,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Create a mapping from a cluster id to its minimal version. // Create a mapping from a cluster id to its minimal version.
int clusterMax = 0; int clusterMax = 0;
clusterMappingsTmp.AsSpan().Fill(0); clusterMappingsTmp.AsSpan().Clear();
// Re-map the ids. // Re-map the ids.
for (int i = 0; i < symbols.Length; i++) for (int i = 0; i < symbols.Length; i++)
@ -308,6 +319,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numUsed = histograms.Count(h => h != null); int numUsed = histograms.Count(h => h != null);
int outerIters = numUsed; int outerIters = numUsed;
int numTriesNoSuccess = outerIters / 2; int numTriesNoSuccess = outerIters / 2;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
if (numUsed < minClusterSize) if (numUsed < minClusterSize)
{ {
@ -354,7 +367,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
idx2 = mappings[idx2]; idx2 = mappings[idx2];
// Calculate cost reduction on combination. // Calculate cost reduction on combination.
double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost); double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost, stats, bitsEntropy);
// Found a better pair? // Found a better pair?
if (currCost < 0) if (currCost < 0)
@ -428,7 +441,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (doEval) if (doEval)
{ {
// Re-evaluate the cost of an updated pair. // Re-evaluate the cost of an updated pair.
HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], 0.0d, p); HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], stats, bitsEntropy, 0.0d, p);
if (p.CostDiff >= 0.0d) if (p.CostDiff >= 0.0d)
{ {
histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1]; histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1];
@ -456,6 +469,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Priority list of histogram pairs. // Priority list of histogram pairs.
var histoPriorityList = new List<HistogramPair>(); var histoPriorityList = new List<HistogramPair>();
int maxSize = histoSize * histoSize; int maxSize = histoSize * histoSize;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i < histoSize; i++) for (int i = 0; i < histoSize; i++)
{ {
@ -471,7 +486,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue; continue;
} }
HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d); HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d, stats, bitsEntropy);
} }
} }
@ -510,7 +525,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue; continue;
} }
HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d); HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d, stats, bitsEntropy);
} }
} }
} }
@ -519,6 +534,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
int inSize = input.Count; int inSize = input.Count;
int outSize = output.Count; int outSize = output.Count;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
if (outSize > 1) if (outSize > 1)
{ {
for (int i = 0; i < inSize; i++) for (int i = 0; i < inSize; i++)
@ -534,7 +551,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bestBits = double.MaxValue; double bestBits = double.MaxValue;
for (int k = 0; k < outSize; k++) for (int k = 0; k < outSize; k++)
{ {
double curBits = output[k].AddThresh(input[i], bestBits); double curBits = output[k].AddThresh(input[i], stats, bitsEntropy, bestBits);
if (k == 0 || curBits < bestBits) if (k == 0 || curBits < bestBits)
{ {
bestBits = curBits; bestBits = curBits;
@ -577,7 +594,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Create a pair from indices "idx1" and "idx2" provided its cost is inferior to "threshold", a negative entropy. /// Create a pair from indices "idx1" and "idx2" provided its cost is inferior to "threshold", a negative entropy.
/// </summary> /// </summary>
/// <returns>The cost of the pair, or 0 if it superior to threshold.</returns> /// <returns>The cost of the pair, or 0 if it superior to threshold.</returns>
private static double HistoPriorityListPush(List<HistogramPair> histoList, int maxSize, List<Vp8LHistogram> histograms, int idx1, int idx2, double threshold) private static double HistoPriorityListPush(List<HistogramPair> histoList, int maxSize, List<Vp8LHistogram> histograms, int idx1, int idx2, double threshold, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{ {
var pair = new HistogramPair(); var pair = new HistogramPair();
@ -598,7 +615,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHistogram h1 = histograms[idx1]; Vp8LHistogram h1 = histograms[idx1];
Vp8LHistogram h2 = histograms[idx2]; Vp8LHistogram h2 = histograms[idx2];
HistoListUpdatePair(h1, h2, threshold, pair); HistoListUpdatePair(h1, h2, stats, bitsEntropy, threshold, pair);
// Do not even consider the pair if it does not improve the entropy. // Do not even consider the pair if it does not improve the entropy.
if (pair.CostDiff >= threshold) if (pair.CostDiff >= threshold)
@ -616,11 +633,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary> /// <summary>
/// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one. /// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one.
/// </summary> /// </summary>
private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, double threshold, HistogramPair pair) private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double threshold, HistogramPair pair)
{ {
double sumCost = h1.BitCost + h2.BitCost; double sumCost = h1.BitCost + h2.BitCost;
pair.CostCombo = 0.0d; pair.CostCombo = 0.0d;
h1.GetCombinedHistogramEntropy(h2, sumCost + threshold, costInitial: pair.CostCombo, out double cost); h1.GetCombinedHistogramEntropy(h2, stats, bitsEntropy, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
pair.CostCombo = cost; pair.CostCombo = cost;
pair.CostDiff = pair.CostCombo - sumCost; pair.CostDiff = pair.CostCombo - sumCost;
} }

9
src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs

@ -49,14 +49,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
return -1; return -1;
} }
else if (t1.TotalCount < t2.TotalCount)
if (t1.TotalCount < t2.TotalCount)
{ {
return 1; return 1;
} }
else
{ return t1.Value < t2.Value ? -1 : 1;
return t1.Value < t2.Value ? -1 : 1;
}
} }
public IDeepCloneable DeepClone() => new HuffmanTree(this); public IDeepCloneable DeepClone() => new HuffmanTree(this);

9
src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs

@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static void CreateHuffmanTree(uint[] histogram, int treeDepthLimit, bool[] bufRle, HuffmanTree[] huffTree, HuffmanTreeCode huffCode) public static void CreateHuffmanTree(uint[] histogram, int treeDepthLimit, bool[] bufRle, HuffmanTree[] huffTree, HuffmanTreeCode huffCode)
{ {
int numSymbols = huffCode.NumSymbols; int numSymbols = huffCode.NumSymbols;
bufRle.AsSpan().Fill(false); bufRle.AsSpan().Clear();
OptimizeHuffmanForRle(numSymbols, bufRle, histogram); OptimizeHuffmanForRle(numSymbols, bufRle, histogram);
GenerateOptimalTree(huffTree, histogram, numSymbols, treeDepthLimit, huffCode.CodeLengths); GenerateOptimalTree(huffTree, histogram, numSymbols, treeDepthLimit, huffCode.CodeLengths);
@ -202,9 +202,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
// Build the Huffman tree. // Build the Huffman tree.
HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray(); #if NET5_0_OR_GREATER
Span<HuffmanTree> treeSlice = tree.AsSpan(0, treeSize);
treeSlice.Sort(HuffmanTree.Compare);
#else
HuffmanTree[] treeCopy = tree.AsSpan(0, treeSize).ToArray();
Array.Sort(treeCopy, HuffmanTree.Compare); Array.Sort(treeCopy, HuffmanTree.Compare);
treeCopy.AsSpan().CopyTo(tree); treeCopy.AsSpan().CopyTo(tree);
#endif
if (treeSize > 1) if (treeSize > 1)
{ {

510
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -27,6 +27,36 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const double Log2Reciprocal = 1.44269504088896338700465094007086; private const double Log2Reciprocal = 1.44269504088896338700465094007086;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<byte> AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
private static readonly Vector128<byte> AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector256<byte> SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
private static readonly Vector128<byte> SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector256<byte> TransformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector256<byte> TransformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector256<byte> TransformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
#endif
/// <summary> /// <summary>
/// Returns the exact index where array1 and array2 are different. For an index /// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly /// inferior or equal to bestLenMatch, the return value just has to be strictly
@ -97,80 +127,68 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) nint i;
for (i = 0; i <= numPixels - 8; i += 8)
{ {
int i; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
for (i = 0; i + 8 <= numPixels; i += 8) Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
{ Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
uint* idx = p + i; Vector256<byte> output = Avx2.Add(input, in0g0g);
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); }
Vector256<byte> output = Avx2.Add(input, in0g0g);
Avx.Store((byte*)idx, output);
}
if (i != numPixels) if (i != numPixels)
{ {
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
} }
} }
else if (Ssse3.IsSupported) else if (Ssse3.IsSupported)
{ {
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{ {
int i; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
for (i = 0; i + 4 <= numPixels; i += 4) Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
{ Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
uint* idx = p + i; Vector128<byte> output = Sse2.Add(input, in0g0g);
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); }
Vector128<byte> output = Sse2.Add(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
if (i != numPixels) if (i != numPixels)
{ {
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
} }
} }
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{ {
int i; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
for (i = 0; i + 4 <= numPixels; i += 4) Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
{ Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
uint* idx = p + i; Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g }
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
if (i != numPixels) if (i != numPixels)
{ {
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
} }
} }
else else
#endif #endif
{ {
AddGreenToBlueAndRedNoneVectorized(pixelData); AddGreenToBlueAndRedScalar(pixelData);
} }
} }
private static void AddGreenToBlueAndRedNoneVectorized(Span<uint> pixelData) private static void AddGreenToBlueAndRedScalar(Span<uint> pixelData)
{ {
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++) for (int i = 0; i < numPixels; i++)
@ -189,80 +207,68 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) nint i;
for (i = 0; i <= numPixels - 8; i += 8)
{ {
int i; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
for (i = 0; i + 8 <= numPixels; i += 8) Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
{ Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
uint* idx = p + i; Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); }
Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Avx.Store((byte*)idx, output);
}
if (i != numPixels) if (i != numPixels)
{ {
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
} }
} }
else if (Ssse3.IsSupported) else if (Ssse3.IsSupported)
{ {
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{ {
int i; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
for (i = 0; i + 4 <= numPixels; i += 4) Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
{ Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
uint* idx = p + i; Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); }
Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
if (i != numPixels) if (i != numPixels)
{ {
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
} }
} }
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{ {
int i; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
for (i = 0; i + 4 <= numPixels; i += 4) Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
{ Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
uint* idx = p + i; Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g }
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
if (i != numPixels) if (i != numPixels)
{ {
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
} }
} }
else else
#endif #endif
{ {
SubtractGreenFromBlueAndRedNoneVectorized(pixelData); SubtractGreenFromBlueAndRedScalar(pixelData);
} }
} }
private static void SubtractGreenFromBlueAndRedNoneVectorized(Span<uint> pixelData) private static void SubtractGreenFromBlueAndRedScalar(Span<uint> pixelData)
{ {
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++) for (int i = 0; i < numPixels; i++)
@ -385,52 +391,74 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red. /// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
/// </summary> /// </summary>
/// <param name="m">The Vp8LMultipliers.</param> /// <param name="m">The Vp8LMultipliers.</param>
/// <param name="data">The pixel data to transform.</param> /// <param name="pixelData">The pixel data to transform.</param>
/// <param name="numPixels">The number of pixels to process.</param> /// <param name="numPixels">The number of pixels to process.</param>
public static void TransformColor(Vp8LMultipliers m, Span<uint> data, int numPixels) public static void TransformColor(Vp8LMultipliers m, Span<uint> pixelData, int numPixels)
{ {
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported) if (Avx2.IsSupported && numPixels >= 8)
{
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nint idx;
for (idx = 0; idx <= numPixels - 8; idx += 8)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), TransformColorAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<short> e = Avx2.ShiftLeftLogical(input.AsInt16(), 8);
Vector256<short> f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector256<int> g = Avx2.ShiftRightLogical(f.AsInt32(), 16);
Vector256<byte> h = Avx2.Add(g.AsByte(), d.AsByte());
Vector256<byte> i = Avx2.And(h, TransformColorRedBlueMask256);
Vector256<byte> output = Avx2.Subtract(input.AsByte(), i);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (idx != numPixels)
{
TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
}
}
else if (Sse2.IsSupported)
{ {
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); nint idx;
var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); for (idx = 0; idx <= numPixels - 4; idx += 4)
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = data)
{ {
int idx; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
for (idx = 0; idx + 4 <= numPixels; idx += 4) Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
{ Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
uint* pos = src + idx; Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector128<uint> input = Sse2.LoadVector128(pos); Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask);
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16); Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte()); Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
Vector128<byte> i = Sse2.And(h, maskredblue); }
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output);
}
if (idx != numPixels) if (idx != numPixels)
{ {
TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx); TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
}
} }
} }
else else
#endif #endif
{ {
TransformColorNoneVectorized(m, data, numPixels); TransformColorScalar(m, pixelData, numPixels);
} }
} }
private static void TransformColorNoneVectorized(Vp8LMultipliers m, Span<uint> data, int numPixels) private static void TransformColorScalar(Vp8LMultipliers m, Span<uint> data, int numPixels)
{ {
for (int i = 0; i < numPixels; i++) for (int i = 0; i < numPixels; i++)
{ {
@ -456,47 +484,71 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static void TransformColorInverse(Vp8LMultipliers m, Span<uint> pixelData) public static void TransformColorInverse(Vp8LMultipliers m, Span<uint> pixelData)
{ {
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported) if (Avx2.IsSupported && pixelData.Length >= 8)
{
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nint idx;
for (idx = 0; idx <= pixelData.Length - 8; idx += 8)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), TransformColorInverseAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<byte> e = Avx2.Add(input.AsByte(), d.AsByte());
Vector256<short> f = Avx2.ShiftLeftLogical(e.AsInt16(), 8);
Vector256<short> g = Avx2.MultiplyHigh(f, multsb2.AsInt16());
Vector256<int> h = Avx2.ShiftRightLogical(g.AsInt32(), 8);
Vector256<byte> i = Avx2.Add(h.AsByte(), f.AsByte());
Vector256<short> j = Avx2.ShiftRightLogical(i.AsInt16(), 8);
Vector256<byte> output = Avx2.Or(j.AsByte(), a);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (idx != pixelData.Length)
{
TransformColorInverseScalar(m, pixelData.Slice((int)idx));
}
}
else if (Sse2.IsSupported)
{ {
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); nint idx;
fixed (uint* src = pixelData) for (idx = 0; idx <= pixelData.Length - 4; idx += 4)
{ {
int idx; ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
for (idx = 0; idx + 4 <= pixelData.Length; idx += 4) Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
{ Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
uint* pos = src + idx; Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector128<uint> input = Sse2.LoadVector128(pos); Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128<short> g = Sse2.MultiplyHigh(f, multsb2.AsInt16());
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128<int> h = Sse2.ShiftRightLogical(g.AsInt32(), 8);
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); Vector128<byte> i = Sse2.Add(h.AsByte(), f.AsByte());
Vector128<short> g = Sse2.MultiplyHigh(f, multsb2.AsInt16()); Vector128<short> j = Sse2.ShiftRightLogical(i.AsInt16(), 8);
Vector128<int> h = Sse2.ShiftRightLogical(g.AsInt32(), 8); Vector128<byte> output = Sse2.Or(j.AsByte(), a);
Vector128<byte> i = Sse2.Add(h.AsByte(), f.AsByte()); Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
Vector128<short> j = Sse2.ShiftRightLogical(i.AsInt16(), 8); }
Vector128<byte> output = Sse2.Or(j.AsByte(), a);
Sse2.Store((byte*)pos, output);
}
if (idx != pixelData.Length) if (idx != pixelData.Length)
{ {
TransformColorInverseNoneVectorized(m, pixelData.Slice(idx)); TransformColorInverseScalar(m, pixelData.Slice((int)idx));
}
} }
} }
else else
#endif #endif
{ {
TransformColorInverseNoneVectorized(m, pixelData); TransformColorInverseScalar(m, pixelData);
} }
} }
private static void TransformColorInverseNoneVectorized(Vp8LMultipliers m, Span<uint> pixelData) private static void TransformColorInverseScalar(Vp8LMultipliers m, Span<uint> pixelData)
{ {
for (int i = 0; i < pixelData.Length; i++) for (int i = 0; i < pixelData.Length; i++)
{ {
@ -551,6 +603,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int mask = tileWidth - 1; int mask = tileWidth - 1;
int tilesPerRow = SubSampleSize(width, transform.Bits); int tilesPerRow = SubSampleSize(width, transform.Bits);
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow; int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
Span<short> scratch = stackalloc short[8];
while (y < yEnd) while (y < yEnd)
{ {
int predictorModeIdx = predictorModeIdxBase; int predictorModeIdx = predictorModeIdxBase;
@ -608,7 +661,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x); PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
break; break;
case 11: case 11:
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x); PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
break; break;
case 12: case 12:
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x); PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
@ -704,7 +757,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Compute the combined Shanon's entropy for distribution {X} and {X+Y}. /// Compute the combined Shanon's entropy for distribution {X} and {X+Y}.
/// </summary> /// </summary>
/// <returns>Shanon entropy.</returns> /// <returns>Shanon entropy.</returns>
public static float CombinedShannonEntropy(int[] x, int[] y) public static float CombinedShannonEntropy(Span<int> x, Span<int> y)
{ {
double retVal = 0.0d; double retVal = 0.0d;
uint sumX = 0, sumXY = 0; uint sumX = 0, sumXY = 0;
@ -730,6 +783,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (float)retVal; return (float)retVal;
} }
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorRed(sbyte greenToRed, uint argb) public static byte TransformColorRed(sbyte greenToRed, uint argb)
{ {
sbyte green = U32ToS8(argb >> 8); sbyte green = U32ToS8(argb >> 8);
@ -738,6 +792,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)(newRed & 0xff); return (byte)(newRed & 0xff);
} }
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb) public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb)
{ {
sbyte green = U32ToS8(argb >> 8); sbyte green = U32ToS8(argb >> 8);
@ -751,6 +806,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary> /// <summary>
/// Fast calculation of log2(v) for integer input. /// Fast calculation of log2(v) for integer input.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v); public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);
/// <summary> /// <summary>
@ -779,7 +835,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static float FastSLog2Slow(uint v) private static float FastSLog2Slow(uint v)
{ {
Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax) if (v < ApproxLogWithCorrectionMax)
{ {
int logCnt = 0; int logCnt = 0;
@ -803,15 +859,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int correction = (int)((23 * (origV & (y - 1))) >> 4); int correction = (int)((23 * (origV & (y - 1))) >> 4);
return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction; return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction;
} }
else
{ return (float)(Log2Reciprocal * v * Math.Log(v));
return (float)(Log2Reciprocal * v * Math.Log(v));
}
} }
private static float FastLog2Slow(uint v) private static float FastLog2Slow(uint v)
{ {
Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax) if (v < ApproxLogWithCorrectionMax)
{ {
int logCnt = 0; int logCnt = 0;
@ -974,11 +1029,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output) private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span<short> scratch)
{ {
for (int x = 0; x < numberOfPixels; x++) for (int x = 0; x < numberOfPixels; x++)
{ {
uint pred = Predictor11(output[x - 1], upper + x); uint pred = Predictor11(output[x - 1], upper + x, scratch);
output[x] = AddPixels(input[x], pred); output[x] = AddPixels(input[x], pred);
} }
} }
@ -1031,7 +1086,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]); public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]); public static uint Predictor11(uint left, uint* top, Span<short> scratch) => Select(top[0], left, top[-1], scratch);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]); public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
@ -1148,11 +1203,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output) public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span<short> scratch)
{ {
for (int x = 0; x < numPixels; x++) for (int x = 0; x < numPixels; x++)
{ {
uint pred = Predictor11(input[x - 1], upper + x); uint pred = Predictor11(input[x - 1], upper + x, scratch);
output[x] = SubPixels(input[x], pred); output[x] = SubPixels(input[x], pred);
} }
} }
@ -1200,30 +1255,65 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{ {
int a = AddSubtractComponentFull( #if SUPPORTS_RUNTIME_INTRINSICS
(int)(c0 >> 24), if (Sse2.IsSupported)
(int)(c1 >> 24), {
(int)(c2 >> 24)); Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
int r = AddSubtractComponentFull( Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
(int)((c0 >> 16) & 0xff), Vector128<byte> c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
(int)((c1 >> 16) & 0xff), Vector128<short> v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16());
(int)((c2 >> 16) & 0xff)); Vector128<short> v2 = Sse2.Subtract(v1, c2Vec.AsInt16());
int g = AddSubtractComponentFull( Vector128<byte> b = Sse2.PackUnsignedSaturate(v2, v2);
(int)((c0 >> 8) & 0xff), uint output = Sse2.ConvertToUInt32(b.AsUInt32());
(int)((c1 >> 8) & 0xff), return output;
(int)((c2 >> 8) & 0xff)); }
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); #endif
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; {
int a = AddSubtractComponentFull(
(int)(c0 >> 24),
(int)(c1 >> 24),
(int)(c2 >> 24));
int r = AddSubtractComponentFull(
(int)((c0 >> 16) & 0xff),
(int)((c1 >> 16) & 0xff),
(int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentFull(
(int)((c0 >> 8) & 0xff),
(int)((c1 >> 8) & 0xff),
(int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
} }
private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
{ {
uint ave = Average2(c0, c1); #if SUPPORTS_RUNTIME_INTRINSICS
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); if (Sse2.IsSupported)
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); {
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; Vector128<byte> b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
Vector128<short> avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16());
Vector128<short> a0 = Sse2.ShiftRightLogical(avg, 1);
Vector128<short> a1 = Sse2.Subtract(a0, b0.AsInt16());
Vector128<short> bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16());
Vector128<short> a2 = Sse2.Subtract(a1, bgta);
Vector128<short> a3 = Sse2.ShiftRightArithmetic(a2, 1);
Vector128<short> a4 = Sse2.Add(a0, a3).AsInt16();
Vector128<byte> a5 = Sse2.PackUnsignedSaturate(a4, a4);
uint output = Sse2.ConvertToUInt32(a5.AsUInt32());
return output;
}
#endif
{
uint ave = Average2(c0, c1);
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
@ -1238,16 +1328,46 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff)); private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector256<int> MkCst32(int hi, int lo) => Vector256.Create((hi << 16) | (lo & 0xffff));
#endif #endif
private static uint Select(uint a, uint b, uint c) private static uint Select(uint a, uint b, uint c, Span<short> scratch)
{ {
int paMinusPb = #if SUPPORTS_RUNTIME_INTRINSICS
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) + if (Sse2.IsSupported)
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) + {
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) + Span<short> output = scratch;
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff)); fixed (short* p = output)
return paMinusPb <= 0 ? a : b; {
Vector128<byte> a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte();
Vector128<byte> b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte();
Vector128<byte> c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte();
Vector128<byte> ac0 = Sse2.SubtractSaturate(a0, c0);
Vector128<byte> ca0 = Sse2.SubtractSaturate(c0, a0);
Vector128<byte> bc0 = Sse2.SubtractSaturate(b0, c0);
Vector128<byte> cb0 = Sse2.SubtractSaturate(c0, b0);
Vector128<byte> ac = Sse2.Or(ac0, ca0);
Vector128<byte> bc = Sse2.Or(bc0, cb0);
Vector128<byte> pa = Sse2.UnpackLow(ac, Vector128<byte>.Zero); // |a - c|
Vector128<byte> pb = Sse2.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
Vector128<ushort> diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
Sse2.Store((ushort*)p, diff);
int paMinusPb = output[3] + output[2] + output[1] + output[0];
return (paMinusPb <= 0) ? a : b;
}
}
else
#endif
{
int paMinusPb =
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
return paMinusPb <= 0 ? a : b;
}
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]

6
src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs

@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public uint BgraOrDistance { get; set; } public uint BgraOrDistance { get; set; }
public static PixOrCopy CreateCacheIdx(int idx) => public static PixOrCopy CreateCacheIdx(int idx) =>
new PixOrCopy() new()
{ {
Mode = PixOrCopyMode.CacheIdx, Mode = PixOrCopyMode.CacheIdx,
BgraOrDistance = (uint)idx, BgraOrDistance = (uint)idx,
@ -23,14 +23,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}; };
public static PixOrCopy CreateLiteral(uint bgra) => public static PixOrCopy CreateLiteral(uint bgra) =>
new PixOrCopy() new()
{ {
Mode = PixOrCopyMode.Literal, Mode = PixOrCopyMode.Literal,
BgraOrDistance = bgra, BgraOrDistance = bgra,
Len = 1 Len = 1
}; };
public static PixOrCopy CreateCopy(uint distance, ushort len) => new PixOrCopy() public static PixOrCopy CreateCopy(uint distance, ushort len) => new()
{ {
Mode = PixOrCopyMode.Copy, Mode = PixOrCopyMode.Copy,
BgraOrDistance = distance, BgraOrDistance = distance,

321
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -5,11 +5,6 @@ using System;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
/// <summary> /// <summary>
@ -17,6 +12,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
internal static unsafe class PredictorEncoder internal static unsafe class PredictorEncoder
{ {
private static readonly sbyte[][] Offset =
{
new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 }
};
private const int GreenRedToBlueNumAxis = 8; private const int GreenRedToBlueNumAxis = 8;
private const int GreenRedToBlueMaxIters = 7; private const int GreenRedToBlueMaxIters = 7;
@ -29,6 +29,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const int PredLowEffort = 11; private const int PredLowEffort = 11;
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<sbyte> DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 };
/// <summary> /// <summary>
/// Finds the best predictor for each tile, and converts the image to residuals /// Finds the best predictor for each tile, and converts the image to residuals
/// with respect to predictions. If nearLosslessQuality &lt; 100, applies /// with respect to predictions. If nearLosslessQuality &lt; 100, applies
@ -41,6 +44,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<uint> bgra, Span<uint> bgra,
Span<uint> bgraScratch, Span<uint> bgraScratch,
Span<uint> image, Span<uint> image,
int[][] histoArgb,
int[][] bestHisto,
bool nearLossless, bool nearLossless,
int nearLosslessQuality, int nearLosslessQuality,
WebpTransparentColorMode transparentColorMode, WebpTransparentColorMode transparentColorMode,
@ -50,6 +55,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int tilesPerRow = LosslessUtils.SubSampleSize(width, bits); int tilesPerRow = LosslessUtils.SubSampleSize(width, bits);
int tilesPerCol = LosslessUtils.SubSampleSize(height, bits); int tilesPerCol = LosslessUtils.SubSampleSize(height, bits);
int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality); int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality);
Span<short> scratch = stackalloc short[8];
// TODO: Can we optimize this? // TODO: Can we optimize this?
int[][] histo = new int[4][]; int[][] histo = new int[4][];
@ -80,11 +86,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
histo, histo,
bgraScratch, bgraScratch,
bgra, bgra,
histoArgb,
bestHisto,
maxQuantization, maxQuantization,
transparentColorMode, transparentColorMode,
usedSubtractGreen, usedSubtractGreen,
nearLossless, nearLossless,
image); image,
scratch);
image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8)); image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8));
} }
@ -105,7 +114,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
lowEffort); lowEffort);
} }
public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span<uint> bgra, Span<uint> image) public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span<uint> bgra, Span<uint> image, Span<int> scratch)
{ {
int maxTileSize = 1 << bits; int maxTileSize = 1 << bits;
int tileXSize = LosslessUtils.SubSampleSize(width, bits); int tileXSize = LosslessUtils.SubSampleSize(width, bits);
@ -139,7 +148,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
height, height,
accumulatedRedHisto, accumulatedRedHisto,
accumulatedBlueHisto, accumulatedBlueHisto,
bgra); bgra,
scratch);
image[offset] = MultipliersToColorCode(prevX); image[offset] = MultipliersToColorCode(prevX);
CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra); CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra);
@ -188,11 +198,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int[][] accumulated, int[][] accumulated,
Span<uint> argbScratch, Span<uint> argbScratch,
Span<uint> argb, Span<uint> argb,
int[][] histoArgb,
int[][] bestHisto,
int maxQuantization, int maxQuantization,
WebpTransparentColorMode transparentColorMode, WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen, bool usedSubtractGreen,
bool nearLossless, bool nearLossless,
Span<uint> modes) Span<uint> modes,
Span<short> scratch)
{ {
const int numPredModes = 14; const int numPredModes = 14;
int startX = tileX << bits; int startX = tileX << bits;
@ -222,21 +235,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
float bestDiff = MaxDiffCost; float bestDiff = MaxDiffCost;
int bestMode = 0; int bestMode = 0;
uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits]; uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits];
int[][] histoArgb = new int[4][];
int[][] bestHisto = new int[4][];
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
histoArgb[i] = new int[256]; histoArgb[i].AsSpan().Clear();
bestHisto[i] = new int[256]; bestHisto[i].AsSpan().Clear();
} }
for (int mode = 0; mode < numPredModes; mode++) for (int mode = 0; mode < numPredModes; mode++)
{ {
for (int i = 0; i < 4; i++)
{
histoArgb[i].AsSpan().Fill(0);
}
if (startY > 0) if (startY > 0)
{ {
// Read the row above the tile which will become the first upper_row. // Read the row above the tile which will become the first upper_row.
@ -272,7 +278,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals); GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch);
for (int relativeX = 0; relativeX < maxX; ++relativeX) for (int relativeX = 0; relativeX < maxX; ++relativeX)
{ {
UpdateHisto(histoArgb, residuals[relativeX]); UpdateHisto(histoArgb, residuals[relativeX]);
@ -300,6 +306,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestDiff = curDiff; bestDiff = curDiff;
bestMode = mode; bestMode = mode;
} }
for (int i = 0; i < 4; i++)
{
histoArgb[i].AsSpan().Clear();
}
} }
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
@ -333,11 +344,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
WebpTransparentColorMode transparentColorMode, WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen, bool usedSubtractGreen,
bool nearLossless, bool nearLossless,
Span<uint> output) Span<uint> output,
Span<short> scratch)
{ {
if (transparentColorMode == WebpTransparentColorMode.Preserve) if (transparentColorMode == WebpTransparentColorMode.Preserve)
{ {
PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output); PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch);
} }
else else
{ {
@ -395,7 +407,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x); predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x);
break; break;
case 11: case 11:
predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x); predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch);
break; break;
case 12: case 12:
predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x); predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x);
@ -539,19 +551,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)lower; return (byte)lower;
} }
else
{
// upper is closer to residual than lower.
if (residual <= boundaryResidual && upper > boundaryResidual)
{
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint <= residual
// (since upper is closer than lower) and residual is below the boundary.
return (byte)(lower + (quantization >> 1));
}
return (byte)(upper & 0xff); // upper is closer to residual than lower.
if (residual <= boundaryResidual && upper > boundaryResidual)
{
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint <= residual
// (since upper is closer than lower) and residual is below the boundary.
return (byte)(lower + (quantization >> 1));
} }
return (byte)upper;
} }
/// <summary> /// <summary>
@ -583,6 +593,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<byte> currentMaxDiffs = MemoryMarshal.Cast<uint, byte>(currentRow.Slice(width + 1)); Span<byte> currentMaxDiffs = MemoryMarshal.Cast<uint, byte>(currentRow.Slice(width + 1));
Span<byte> lowerMaxDiffs = currentMaxDiffs.Slice(width); Span<byte> lowerMaxDiffs = currentMaxDiffs.Slice(width);
Span<short> scratch = stackalloc short[8];
for (int y = 0; y < height; y++) for (int y = 0; y < height; y++)
{ {
Span<uint> tmp32 = upperRow; Span<uint> tmp32 = upperRow;
@ -593,7 +604,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (lowEffort) if (lowEffort)
{ {
PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width)); PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch);
} }
else else
{ {
@ -634,7 +645,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
transparentColorMode, transparentColorMode,
usedSubtractGreen, usedSubtractGreen,
nearLossless, nearLossless,
argb.Slice((y * width) + x)); argb.Slice((y * width) + x),
scratch);
x = xEnd; x = xEnd;
} }
@ -649,7 +661,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numPixels, int numPixels,
Span<uint> currentSpan, Span<uint> currentSpan,
Span<uint> upperSpan, Span<uint> upperSpan,
Span<uint> outputSpan) Span<uint> outputSpan,
Span<short> scratch)
{ {
#pragma warning disable SA1503 // Braces should not be omitted #pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* current = currentSpan) fixed (uint* current = currentSpan)
@ -718,7 +731,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output); LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output);
break; break;
case 11: case 11:
LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output); LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch);
break; break;
case 12: case 12:
LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output); LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output);
@ -819,7 +832,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span<uint> argb) private static Vp8LMultipliers GetBestColorTransformForTile(
int tileX,
int tileY,
int bits,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int quality,
int xSize,
int ySize,
int[] accumulatedRedHisto,
int[] accumulatedBlueHisto,
Span<uint> argb,
Span<int> scratch)
{ {
int maxTileSize = 1 << bits; int maxTileSize = 1 << bits;
int tileYOffset = tileY * maxTileSize; int tileYOffset = tileY * maxTileSize;
@ -832,18 +857,28 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var bestTx = default(Vp8LMultipliers); var bestTx = default(Vp8LMultipliers);
GetBestGreenToRed(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx); GetBestGreenToRed(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx);
GetBestGreenRedToBlue(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx); GetBestGreenRedToBlue(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx);
return bestTx; return bestTx;
} }
private static void GetBestGreenToRed(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedRedHisto, ref Vp8LMultipliers bestTx) private static void GetBestGreenToRed(
Span<uint> argb,
int stride,
Span<int> scratch,
int tileWidth,
int tileHeight,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int quality,
int[] accumulatedRedHisto,
ref Vp8LMultipliers bestTx)
{ {
int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6] int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
int greenToRedBest = 0; int greenToRedBest = 0;
double bestDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto); double bestDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto);
for (int iter = 0; iter < maxIters; iter++) for (int iter = 0; iter < maxIters; iter++)
{ {
// ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
@ -855,7 +890,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
for (int offset = -delta; offset <= delta; offset += 2 * delta) for (int offset = -delta; offset <= delta; offset += 2 * delta)
{ {
int greenToRedCur = offset + greenToRedBest; int greenToRedCur = offset + greenToRedBest;
double curDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto); double curDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto);
if (curDiff < bestDiff) if (curDiff < bestDiff)
{ {
bestDiff = curDiff; bestDiff = curDiff;
@ -867,24 +902,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestTx.GreenToRed = (byte)(greenToRedBest & 0xff); bestTx.GreenToRed = (byte)(greenToRedBest & 0xff);
} }
private static void GetBestGreenRedToBlue(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx) private static void GetBestGreenRedToBlue(Span<uint> argb, int stride, Span<int> scratch, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx)
{ {
int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4; int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4;
int greenToBlueBest = 0; int greenToBlueBest = 0;
int redToBlueBest = 0; int redToBlueBest = 0;
sbyte[][] offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } };
sbyte[] deltaLut = { 16, 16, 8, 4, 2, 2, 2 };
// Initial value at origin: // Initial value at origin:
double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto); double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto);
for (int iter = 0; iter < iters; iter++) for (int iter = 0; iter < iters; iter++)
{ {
int delta = deltaLut[iter]; int delta = DeltaLut[iter];
for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++) for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++)
{ {
int greenToBlueCur = (offset[axis][0] * delta) + greenToBlueBest; int greenToBlueCur = (Offset[axis][0] * delta) + greenToBlueBest;
int redToBlueCur = (offset[axis][1] * delta) + redToBlueBest; int redToBlueCur = (Offset[axis][1] * delta) + redToBlueBest;
double curDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto); double curDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto);
if (curDiff < bestDiff) if (curDiff < bestDiff)
{ {
bestDiff = curDiff; bestDiff = curDiff;
@ -910,11 +943,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestTx.RedToBlue = (byte)(redToBlueBest & 0xff); bestTx.RedToBlue = (byte)(redToBlueBest & 0xff);
} }
private static double GetPredictionCostCrossColorRed(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToRed, int[] accumulatedRedHisto) private static double GetPredictionCostCrossColorRed(
Span<uint> argb,
int stride,
Span<int> scratch,
int tileWidth,
int tileHeight,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int greenToRed,
int[] accumulatedRedHisto)
{ {
int[] histo = new int[256]; Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo); ColorSpaceTransformUtils.CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo); double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo);
if ((byte)greenToRed == prevX.GreenToRed) if ((byte)greenToRed == prevX.GreenToRed)
@ -937,11 +980,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff; return curDiff;
} }
private static double GetPredictionCostCrossColorBlue(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToBlue, int redToBlue, int[] accumulatedBlueHisto) private static double GetPredictionCostCrossColorBlue(
Span<uint> argb,
int stride,
Span<int> scratch,
int tileWidth,
int tileHeight,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int greenToBlue,
int redToBlue,
int[] accumulatedBlueHisto)
{ {
int[] histo = new int[256]; Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); ColorSpaceTransformUtils.CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo); double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo);
if ((byte)greenToBlue == prevX.GreenToBlue) if ((byte)greenToBlue == prevX.GreenToBlue)
{ {
@ -980,155 +1034,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff; return curDiff;
} }
private static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
var maskgreen = Vector128.Create(0x00ff00);
var mask = Vector128.Create((short)0xff);
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, maskgreen.AsByte());
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, mask.AsByte()); // 0 r'
Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else
#endif
{
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
}
}
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
private static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, shufflerLow);
Vector128<byte> r1 = Ssse3.Shuffle(input1, shufflerHigh);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, maskgreenblue);
Vector128<byte> gb1 = Sse2.And(input1, maskgreenblue);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), maskgreen);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, maskblue);
Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else
#endif
{
CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
}
}
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
private static float PredictionCostSpatialHistogram(int[][] accumulated, int[][] tile) private static float PredictionCostSpatialHistogram(int[][] accumulated, int[][] tile)
{ {
double retVal = 0.0d; double retVal = 0.0d;
@ -1143,7 +1048,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static double PredictionCostCrossColor(int[] accumulated, int[] counts) private static double PredictionCostCrossColor(int[] accumulated, Span<int> counts)
{ {
// Favor low entropy, locally and globally. // Favor low entropy, locally and globally.
// Favor small absolute values for PredictionCostSpatial. // Favor small absolute values for PredictionCostSpatial.
@ -1152,7 +1057,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static float PredictionCostSpatial(int[] counts, int weight0, double expVal) private static float PredictionCostSpatial(Span<int> counts, int weight0, double expVal)
{ {
int significantSymbols = 256 >> 4; int significantSymbols = 256 >> 4;
double expDecayFactor = 0.6; double expDecayFactor = 0.6;

37
src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs

@ -19,6 +19,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
internal class Vp8LEncoder : IDisposable internal class Vp8LEncoder : IDisposable
{ {
/// <summary>
/// Scratch buffer to reduce allocations.
/// </summary>
private readonly int[] scratch = new int[256];
private readonly int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] };
private readonly int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] };
/// <summary> /// <summary>
/// The <see cref="MemoryAllocator"/> to use for buffer allocations. /// The <see cref="MemoryAllocator"/> to use for buffer allocations.
/// </summary> /// </summary>
@ -128,6 +137,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> Order => new byte[] { 1, 2, 0, 3 };
/// <summary> /// <summary>
/// Gets the memory for the image data as packed bgra values. /// Gets the memory for the image data as packed bgra values.
/// </summary> /// </summary>
@ -234,7 +246,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.EncodeStream(image); this.EncodeStream(image);
// Write bytes from the bitwriter buffer to the stream. // Write bytes from the bitwriter buffer to the stream.
this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height); this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height, hasAlpha);
} }
/// <summary> /// <summary>
@ -675,6 +687,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.EncodedData.GetSpan(), this.EncodedData.GetSpan(),
this.BgraScratch.GetSpan(), this.BgraScratch.GetSpan(),
this.TransformData.GetSpan(), this.TransformData.GetSpan(),
this.histoArgb,
this.bestHisto,
this.nearLossless, this.nearLossless,
nearLosslessStrength, nearLosslessStrength,
this.transparentColorMode, this.transparentColorMode,
@ -694,7 +708,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits); int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits);
PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan()); PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan(), this.scratch);
this.bitWriter.PutBits(WebpConstants.TransformPresent, 1); this.bitWriter.PutBits(WebpConstants.TransformPresent, 1);
this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2); this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2);
@ -736,7 +750,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var histogramImage = new List<Vp8LHistogram>() var histogramImage = new List<Vp8LHistogram>()
{ {
new Vp8LHistogram(cacheBits) new(cacheBits)
}; };
// Build histogram image and symbols from backward references. // Build histogram image and symbols from backward references.
@ -780,7 +794,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode) private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode)
{ {
int count = 0; int count = 0;
int[] symbols = { 0, 0 }; Span<int> symbols = this.scratch.AsSpan(0, 2);
symbols.Clear();
int maxBits = 8; int maxBits = 8;
int maxSymbol = 1 << maxBits; int maxSymbol = 1 << maxBits;
@ -973,10 +988,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (v.IsLiteral()) if (v.IsLiteral())
{ {
byte[] order = { 1, 2, 0, 3 };
for (int k = 0; k < 4; k++) for (int k = 0; k < 4; k++)
{ {
int code = (int)v.Literal(order[k]); int code = (int)v.Literal(Order[k]);
this.bitWriter.WriteHuffmanCode(codes[k], code); this.bitWriter.WriteHuffmanCode(codes[k], code);
} }
} }
@ -1092,9 +1106,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
histo[(int)HistoIx.HistoBluePred * 256]++; histo[(int)HistoIx.HistoBluePred * 256]++;
histo[(int)HistoIx.HistoAlphaPred * 256]++; histo[(int)HistoIx.HistoAlphaPred * 256]++;
var bitEntropy = new Vp8LBitEntropy();
for (int j = 0; j < (int)HistoIx.HistoTotal; j++) for (int j = 0; j < (int)HistoIx.HistoTotal; j++)
{ {
var bitEntropy = new Vp8LBitEntropy(); bitEntropy.Init();
Span<uint> curHisto = histo.Slice(j * 256, 256); Span<uint> curHisto = histo.Slice(j * 256, 256);
bitEntropy.BitsEntropyUnrefined(curHisto, 256); bitEntropy.BitsEntropyUnrefined(curHisto, 256);
entropyComp[j] = bitEntropy.BitsEntropyRefine(); entropyComp[j] = bitEntropy.BitsEntropyRefine();
@ -1190,9 +1205,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return false; return false;
} }
#if NET5_0_OR_GREATER
var paletteSlice = palette.Slice(0, this.PaletteSize);
paletteSlice.Sort();
#else
uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray(); uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray();
Array.Sort(paletteArray); Array.Sort(paletteArray);
paletteArray.CopyTo(palette); paletteArray.CopyTo(palette);
#endif
if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize)) if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize))
{ {
@ -1447,7 +1467,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
return mid; return mid;
} }
else if (sorted[mid] < color)
if (sorted[mid] < color)
{ {
low = mid; low = mid;
} }

67
src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs

@ -157,29 +157,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Estimate how many bits the combined entropy of literals and distance approximately maps to. /// Estimate how many bits the combined entropy of literals and distance approximately maps to.
/// </summary> /// </summary>
/// <returns>Estimated bits.</returns> /// <returns>Estimated bits.</returns>
public double EstimateBits() public double EstimateBits(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{ {
uint notUsed = 0; uint notUsed = 0;
return return
PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0]) PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0], stats, bitsEntropy)
+ PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1]) + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1], stats, bitsEntropy)
+ PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2]) + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2], stats, bitsEntropy)
+ PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3]) + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3], stats, bitsEntropy)
+ PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy)
+ ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes)
+ ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
} }
public void UpdateHistogramCost() public void UpdateHistogramCost(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{ {
uint alphaSym = 0, redSym = 0, blueSym = 0; uint alphaSym = 0, redSym = 0, blueSym = 0;
uint notUsed = 0; uint notUsed = 0;
double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3]);
double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3], stats, bitsEntropy);
double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
int numCodes = this.NumCodes(); int numCodes = this.NumCodes();
this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0]) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1]); this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1], stats, bitsEntropy);
this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2]); this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2], stats, bitsEntropy);
this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost; this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost;
if ((alphaSym | redSym | blueSym) == NonTrivialSym) if ((alphaSym | redSym | blueSym) == NonTrivialSym)
{ {
@ -198,11 +199,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Since the previous score passed is 'costThreshold', we only need to compare /// Since the previous score passed is 'costThreshold', we only need to compare
/// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early. /// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early.
/// </summary> /// </summary>
public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram output) public double AddEval(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold, Vp8LHistogram output)
{ {
double sumCost = this.BitCost + b.BitCost; double sumCost = this.BitCost + b.BitCost;
costThreshold += sumCost; costThreshold += sumCost;
if (this.GetCombinedHistogramEntropy(b, costThreshold, costInitial: 0, out double cost)) if (this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial: 0, out double cost))
{ {
this.Add(b, output); this.Add(b, output);
output.BitCost = cost; output.BitCost = cost;
@ -212,10 +213,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return cost - sumCost; return cost - sumCost;
} }
public double AddThresh(Vp8LHistogram b, double costThreshold) public double AddThresh(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold)
{ {
double costInitial = -this.BitCost; double costInitial = -this.BitCost;
this.GetCombinedHistogramEntropy(b, costThreshold, costInitial, out double cost); this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial, out double cost);
return cost; return cost;
} }
@ -239,12 +240,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
: NonTrivialSym; : NonTrivialSym;
} }
public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, double costInitial, out double cost) public bool GetCombinedHistogramEntropy(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy, double costThreshold, double costInitial, out double cost)
{ {
bool trivialAtEnd = false; bool trivialAtEnd = false;
cost = costInitial; cost = costInitial;
cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false); cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false, stats, bitEntropy);
cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
@ -267,25 +268,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd); cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
} }
cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd); cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
} }
cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd); cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
} }
cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false); cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
@ -319,7 +320,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else else
{ {
output.Literal.AsSpan(0, literalSize).Fill(0); output.Literal.AsSpan(0, literalSize).Clear();
} }
} }
@ -342,7 +343,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else else
{ {
output.Red.AsSpan(0, size).Fill(0); output.Red.AsSpan(0, size).Clear();
} }
} }
@ -365,7 +366,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else else
{ {
output.Blue.AsSpan(0, size).Fill(0); output.Blue.AsSpan(0, size).Clear();
} }
} }
@ -388,7 +389,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else else
{ {
output.Alpha.AsSpan(0, size).Fill(0); output.Alpha.AsSpan(0, size).Clear();
} }
} }
@ -411,13 +412,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else else
{ {
output.Distance.AsSpan(0, size).Fill(0); output.Distance.AsSpan(0, size).Clear();
} }
} }
private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd) private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{ {
var stats = new Vp8LStreaks(); stats.Clear();
bitEntropy.Init();
if (trivialAtEnd) if (trivialAtEnd)
{ {
// This configuration is due to palettization that transforms an indexed // This configuration is due to palettization that transforms an indexed
@ -435,7 +437,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return stats.FinalHuffmanCost(); return stats.FinalHuffmanCost();
} }
var bitEntropy = new Vp8LBitEntropy();
if (isXUsed) if (isXUsed)
{ {
if (isYUsed) if (isYUsed)
@ -479,10 +480,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary> /// <summary>
/// Get the symbol entropy for the distribution 'population'. /// Get the symbol entropy for the distribution 'population'.
/// </summary> /// </summary>
private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed) private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{ {
var bitEntropy = new Vp8LBitEntropy(); bitEntropy.Init();
var stats = new Vp8LStreaks(); stats.Clear();
bitEntropy.BitsEntropyUnrefined(population, length, stats); bitEntropy.BitsEntropyUnrefined(population, length, stats);
trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym; trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym;

9
src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs

@ -1,6 +1,8 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
internal class Vp8LStreaks internal class Vp8LStreaks
@ -28,6 +30,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
public int[][] Streaks { get; } public int[][] Streaks { get; }
public void Clear()
{
this.Counts.AsSpan().Clear();
this.Streaks[0].AsSpan().Clear();
this.Streaks[1].AsSpan().Clear();
}
public double FinalHuffmanCost() public double FinalHuffmanCost()
{ {
// The constants in this function are experimental and got rounded from // The constants in this function are experimental and got rounded from

3
src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs

@ -418,6 +418,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize]; var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize];
var hTreeGroups = new HTreeGroup[numHTreeGroups]; var hTreeGroups = new HTreeGroup[numHTreeGroups];
Span<HuffmanCode> huffmanTable = huffmanTables.AsSpan(); Span<HuffmanCode> huffmanTable = huffmanTables.AsSpan();
int[] codeLengths = new int[maxAlphabetSize];
for (int i = 0; i < numHTreeGroupsMax; i++) for (int i = 0; i < numHTreeGroupsMax; i++)
{ {
hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize); hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize);
@ -425,7 +426,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int totalSize = 0; int totalSize = 0;
bool isTrivialLiteral = true; bool isTrivialLiteral = true;
int maxBits = 0; int maxBits = 0;
int[] codeLengths = new int[maxAlphabetSize]; codeLengths.AsSpan().Clear();
for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++) for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++)
{ {
int alphabetSize = WebpConstants.AlphabetSize[j]; int alphabetSize = WebpConstants.AlphabetSize[j];

382
src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

@ -4,23 +4,78 @@
using System; using System;
using System.Buffers.Binary; using System.Buffers.Binary;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
// ReSharper disable InconsistentNaming // ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
internal static class LossyUtils internal static class LossyUtils
{ {
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte();
#endif
// Note: method name in libwebp reference implementation is called VP8SSE16x16.
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Sse16X16(Span<byte> a, Span<byte> b) => GetSse(a, b, 16, 16); public static int Vp8_Sse16X16(Span<byte> a, Span<byte> b) => Vp8_SseNxN(a, b, 16, 16);
// Note: method name in libwebp reference implementation is called VP8SSE16x8.
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Sse16X8(Span<byte> a, Span<byte> b) => GetSse(a, b, 16, 8); public static int Vp8_Sse16X8(Span<byte> a, Span<byte> b) => Vp8_SseNxN(a, b, 16, 8);
// Note: method name in libwebp reference implementation is called VP8SSE4x4.
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Sse4X4(Span<byte> a, Span<byte> b) => GetSse(a, b, 4, 4); public static int Vp8_Sse4X4(Span<byte> a, Span<byte> b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
// Load values.
ref byte aRef = ref MemoryMarshal.GetReference(a);
Vector128<byte> a0 = Unsafe.As<byte, Vector128<byte>>(ref aRef);
Vector128<byte> a1 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, WebpConstants.Bps));
Vector128<byte> a2 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 2));
Vector128<byte> a3 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 3));
ref byte bRef = ref MemoryMarshal.GetReference(b);
Vector128<byte> b0 = Unsafe.As<byte, Vector128<byte>>(ref bRef);
Vector128<byte> b1 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, WebpConstants.Bps));
Vector128<byte> b2 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 2));
Vector128<byte> b3 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 3));
// Combine pair of lines.
Vector128<int> a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32());
Vector128<int> a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32());
Vector128<int> b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32());
Vector128<int> b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32());
// Convert to 16b.
Vector128<byte> a01s = Sse2.UnpackLow(a01.AsByte(), Vector128<byte>.Zero);
Vector128<byte> a23s = Sse2.UnpackLow(a23.AsByte(), Vector128<byte>.Zero);
Vector128<byte> b01s = Sse2.UnpackLow(b01.AsByte(), Vector128<byte>.Zero);
Vector128<byte> b23s = Sse2.UnpackLow(b23.AsByte(), Vector128<byte>.Zero);
// subtract, square and accumulate.
Vector128<byte> d0 = Sse2.SubtractSaturate(a01s, b01s);
Vector128<byte> d1 = Sse2.SubtractSaturate(a23s, b23s);
Vector128<int> e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16());
Vector128<int> e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16());
Vector128<int> sum = Sse2.Add(e0, e1);
return Numerics.ReduceSum(sum);
}
else
#endif
{
return Vp8_SseNxN(a, b, 4, 4);
}
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int GetSse(Span<byte> a, Span<byte> b, int w, int h) public static int Vp8_SseNxN(Span<byte> a, Span<byte> b, int w, int h)
{ {
int count = 0; int count = 0;
int aOffset = 0; int aOffset = 0;
@ -58,14 +113,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Disto16X16(Span<byte> a, Span<byte> b, Span<ushort> w) public static int Vp8Disto16X16(Span<byte> a, Span<byte> b, Span<ushort> w, Span<int> scratch)
{ {
int d = 0; int d = 0;
int dataSize = (4 * WebpConstants.Bps) - 16;
for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps) for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps)
{ {
for (int x = 0; x < 16; x += 4) for (int x = 0; x < 16; x += 4)
{ {
d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w); d += Vp8Disto4X4(a.Slice(x + y, dataSize), b.Slice(x + y, dataSize), w, scratch);
} }
} }
@ -73,11 +129,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Disto4X4(Span<byte> a, Span<byte> b, Span<ushort> w) public static int Vp8Disto4X4(Span<byte> a, Span<byte> b, Span<ushort> w, Span<int> scratch)
{ {
int sum1 = TTransform(a, w); #if SUPPORTS_RUNTIME_INTRINSICS
int sum2 = TTransform(b, w); if (Sse41.IsSupported)
return Math.Abs(sum2 - sum1) >> 5; {
int diffSum = TTransformSse41(a, b, w);
return Math.Abs(diffSum) >> 5;
}
else
#endif
{
int sum1 = TTransform(a, w, scratch);
int sum2 = TTransform(b, w, scratch);
return Math.Abs(sum2 - sum1) >> 5;
}
} }
public static void DC16(Span<byte> dst, Span<byte> yuv, int offset) public static void DC16(Span<byte> dst, Span<byte> yuv, int offset)
@ -252,18 +318,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static void TM4(Span<byte> dst, Span<byte> yuv, int offset) => TrueMotion(dst, yuv, offset, 4); public static void TM4(Span<byte> dst, Span<byte> yuv, int offset) => TrueMotion(dst, yuv, offset, 4);
public static void VE4(Span<byte> dst, Span<byte> yuv, int offset) public static void VE4(Span<byte> dst, Span<byte> yuv, int offset, Span<byte> vals)
{ {
// vertical // vertical
int topOffset = offset - WebpConstants.Bps; int topOffset = offset - WebpConstants.Bps;
byte[] vals = vals[0] = Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]);
{ vals[1] = Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]);
Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]), vals[2] = Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]);
Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]), vals[3] = Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]);
Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]),
Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4])
};
int endIdx = 4 * WebpConstants.Bps; int endIdx = 4 * WebpConstants.Bps;
for (int i = 0; i < endIdx; i += WebpConstants.Bps) for (int i = 0; i < endIdx; i += WebpConstants.Bps)
{ {
@ -504,9 +566,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// <summary> /// <summary>
/// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion. /// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion.
/// </summary> /// </summary>
public static void TransformWht(Span<short> input, Span<short> output) public static void TransformWht(Span<short> input, Span<short> output, Span<int> scratch)
{ {
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
tmp.Clear();
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
int iPlus4 = 4 + i; int iPlus4 = 4 + i;
@ -544,10 +607,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// Returns the weighted sum of the absolute value of transformed coefficients. /// Returns the weighted sum of the absolute value of transformed coefficients.
/// w[] contains a row-major 4 by 4 symmetric matrix. /// w[] contains a row-major 4 by 4 symmetric matrix.
/// </summary> /// </summary>
public static int TTransform(Span<byte> input, Span<ushort> w) public static int TTransform(Span<byte> input, Span<ushort> w, Span<int> scratch)
{ {
int sum = 0; int sum = 0;
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
tmp.Clear();
// horizontal pass. // horizontal pass.
int inputOffset = 0; int inputOffset = 0;
@ -591,15 +655,148 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return sum; return sum;
} }
public static void TransformTwo(Span<short> src, Span<byte> dst) #if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// Hadamard transform
/// Returns the weighted sum of the absolute value of transformed coefficients.
/// w[] contains a row-major 4 by 4 symmetric matrix.
/// </summary>
public static int TTransformSse41(Span<byte> inputA, Span<byte> inputB, Span<ushort> w)
{
// Load and combine inputs.
Vector128<byte> ina0 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputA));
Vector128<byte> ina1 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps, 16)));
Vector128<byte> ina2 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 2, 16)));
Vector128<long> ina3 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 3, 16))).AsInt64();
Vector128<byte> inb0 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputB));
Vector128<byte> inb1 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps, 16)));
Vector128<byte> inb2 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 2, 16)));
Vector128<long> inb3 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 3, 16))).AsInt64();
// Combine inA and inB (we'll do two transforms in parallel).
Vector128<int> inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32());
Vector128<int> inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32());
Vector128<int> inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32());
Vector128<int> inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32());
Vector128<short> tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte());
Vector128<short> tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte());
Vector128<short> tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte());
Vector128<short> tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte());
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
// Vertical pass first to avoid a transpose (vertical and horizontal passes
// are commutative because w/kWeightY is symmetric) and subsequent transpose.
// Calculate a and b (two 4x4 at once).
Vector128<short> a0 = Sse2.Add(tmp0, tmp2);
Vector128<short> a1 = Sse2.Add(tmp1, tmp3);
Vector128<short> a2 = Sse2.Subtract(tmp1, tmp3);
Vector128<short> a3 = Sse2.Subtract(tmp0, tmp2);
Vector128<short> b0 = Sse2.Add(a0, a1);
Vector128<short> b1 = Sse2.Add(a3, a2);
Vector128<short> b2 = Sse2.Subtract(a3, a2);
Vector128<short> b3 = Sse2.Subtract(a0, a1);
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
// Transpose the two 4x4.
Vp8Transpose_2_4x4_16b(b0, b1, b2, b3, out Vector128<long> output0, out Vector128<long> output1, out Vector128<long> output2, out Vector128<long> output3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
// Horizontal pass and difference of weighted sums.
Vector128<ushort> w0 = Unsafe.As<ushort, Vector128<ushort>>(ref MemoryMarshal.GetReference(w));
Vector128<ushort> w8 = Unsafe.As<ushort, Vector128<ushort>>(ref MemoryMarshal.GetReference(w.Slice(8, 8)));
// Calculate a and b (two 4x4 at once).
a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16());
a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16());
a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16());
a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16());
b0 = Sse2.Add(a0, a1);
b1 = Sse2.Add(a3, a2);
b2 = Sse2.Subtract(a3, a2);
b3 = Sse2.Subtract(a0, a1);
// Separate the transforms of inA and inB.
Vector128<long> ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64());
Vector128<long> ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64());
Vector128<long> bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64());
Vector128<long> bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64());
Vector128<ushort> ab0Abs = Ssse3.Abs(ab0.AsInt16());
Vector128<ushort> ab2Abs = Ssse3.Abs(ab2.AsInt16());
Vector128<ushort> b0Abs = Ssse3.Abs(bb0.AsInt16());
Vector128<ushort> bb2Abs = Ssse3.Abs(bb2.AsInt16());
// weighted sums.
Vector128<int> ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16());
Vector128<int> ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16());
Vector128<int> b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16());
Vector128<int> bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16());
Vector128<int> ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8);
Vector128<int> b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8);
// difference of weighted sums.
Vector128<int> result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32());
return Numerics.ReduceSum(result);
}
// Transpose two 4x4 16b matrices horizontally stored in registers.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Vp8Transpose_2_4x4_16b(Vector128<short> b0, Vector128<short> b1, Vector128<short> b2, Vector128<short> b3, out Vector128<long> output0, out Vector128<long> output1, out Vector128<long> output2, out Vector128<long> output3)
{
// Transpose the two 4x4.
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
Vector128<short> transpose00 = Sse2.UnpackLow(b0, b1);
Vector128<short> transpose01 = Sse2.UnpackLow(b2, b3);
Vector128<short> transpose02 = Sse2.UnpackHigh(b0, b1);
Vector128<short> transpose03 = Sse2.UnpackHigh(b2, b3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
Vector128<int> transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32());
Vector128<int> transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32());
Vector128<int> transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32());
Vector128<int> transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32());
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64());
output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64());
output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64());
output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64());
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
}
#endif
public static void TransformTwo(Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
TransformOne(src, dst); TransformOne(src, dst, scratch);
TransformOne(src.Slice(16), dst.Slice(4)); TransformOne(src.Slice(16), dst.Slice(4), scratch);
} }
public static void TransformOne(Span<short> src, Span<byte> dst) public static void TransformOne(Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
Span<int> tmp = stackalloc int[4 * 4]; Span<int> tmp = scratch.Slice(0, 16);
int tmpOffset = 0; int tmpOffset = 0;
for (int srcOffset = 0; srcOffset < 4; srcOffset++) for (int srcOffset = 0; srcOffset < 4; srcOffset++)
{ {
@ -671,10 +868,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Store2(dst, 3, a - d4, d1, c1); Store2(dst, 3, a - d4, d1, c1);
} }
public static void TransformUv(Span<short> src, Span<byte> dst) public static void TransformUv(Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
TransformTwo(src.Slice(0 * 16), dst); TransformTwo(src.Slice(0 * 16), dst, scratch);
TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps)); TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps), scratch);
} }
public static void TransformDcuv(Span<short> src, Span<byte> dst) public static void TransformDcuv(Span<short> src, Span<byte> dst)
@ -802,26 +999,55 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
FilterLoop24(v, offsetPlus4, 1, stride, 8, thresh, ithresh, hevThresh); FilterLoop24(v, offsetPlus4, 1, stride, 8, thresh, ithresh, hevThresh);
} }
[MethodImpl(InliningOptions.ShortMethod)] public static void Mean16x4(Span<byte> input, Span<uint> dc)
public static uint LoadUv(byte u, byte v) =>
(uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each).
[MethodImpl(InliningOptions.ShortMethod)]
public static void YuvToBgr(int y, int u, int v, Span<byte> bgr)
{ {
bgr[0] = (byte)YuvToB(y, u); #if SUPPORTS_RUNTIME_INTRINSICS
bgr[1] = (byte)YuvToG(y, u, v); if (Ssse3.IsSupported)
bgr[2] = (byte)YuvToR(y, v); {
} Vector128<byte> a0 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input));
Vector128<byte> a1 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps, 16)));
[MethodImpl(InliningOptions.ShortMethod)] Vector128<byte> a2 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 2, 16)));
public static int YuvToB(int y, int u) => Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685); Vector128<byte> a3 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 3, 16)));
Vector128<short> b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte
[MethodImpl(InliningOptions.ShortMethod)] Vector128<short> b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8);
public static int YuvToG(int y, int u, int v) => Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708); Vector128<short> b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8);
Vector128<short> b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8);
Vector128<byte> c0 = Sse2.And(a0, Mean16x4Mask); // lo byte
Vector128<byte> c1 = Sse2.And(a1, Mean16x4Mask);
Vector128<byte> c2 = Sse2.And(a2, Mean16x4Mask);
Vector128<byte> c3 = Sse2.And(a3, Mean16x4Mask);
Vector128<int> d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32());
Vector128<int> d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32());
Vector128<int> d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32());
Vector128<int> d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32());
Vector128<int> e0 = Sse2.Add(d0, d1);
Vector128<int> e1 = Sse2.Add(d2, d3);
Vector128<int> f0 = Sse2.Add(e0, e1);
Vector128<short> hadd = Ssse3.HorizontalAdd(f0.AsInt16(), f0.AsInt16());
Vector128<uint> wide = Sse2.UnpackLow(hadd, Vector128<short>.Zero).AsUInt32();
ref uint outputRef = ref MemoryMarshal.GetReference(dc);
Unsafe.As<uint, Vector128<uint>>(ref outputRef) = wide;
}
else
#endif
{
for (int k = 0; k < 4; k++)
{
uint avg = 0;
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 4; x++)
{
avg += input[x + (y * WebpConstants.Bps)];
}
}
[MethodImpl(InliningOptions.ShortMethod)] dc[k] = avg;
public static int YuvToR(int y, int v) => Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234); input = input.Slice(4); // go to next 4x4 block.
}
}
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static byte Avg2(byte a, byte b) => (byte)((a + b + 1) >> 1); public static byte Avg2(byte a, byte b) => (byte)((a + b + 1) >> 1);
@ -934,11 +1160,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step]; int p0 = p[offset - step];
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1]; int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1);
int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3]; int a1 = WebpLookupTables.Sclip2((a + 4) >> 3);
int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3]; int a2 = WebpLookupTables.Sclip2((a + 3) >> 3);
p[offset - step] = WebpLookupTables.Clip1[p0 + a2]; p[offset - step] = WebpLookupTables.Clip1(p0 + a2);
p[offset] = WebpLookupTables.Clip1[q0 - a1]; p[offset] = WebpLookupTables.Clip1(q0 - a1);
} }
private static void DoFilter4(Span<byte> p, int offset, int step) private static void DoFilter4(Span<byte> p, int offset, int step)
@ -950,13 +1176,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
int a = 3 * (q0 - p0); int a = 3 * (q0 - p0);
int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3]; int a1 = WebpLookupTables.Sclip2((a + 4) >> 3);
int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3]; int a2 = WebpLookupTables.Sclip2((a + 3) >> 3);
int a3 = (a1 + 1) >> 1; int a3 = (a1 + 1) >> 1;
p[offsetMinus2Step] = WebpLookupTables.Clip1[p1 + a3]; p[offsetMinus2Step] = WebpLookupTables.Clip1(p1 + a3);
p[offset - step] = WebpLookupTables.Clip1[p0 + a2]; p[offset - step] = WebpLookupTables.Clip1(p0 + a2);
p[offset] = WebpLookupTables.Clip1[q0 - a1]; p[offset] = WebpLookupTables.Clip1(q0 - a1);
p[offset + step] = WebpLookupTables.Clip1[q1 - a3]; p[offset + step] = WebpLookupTables.Clip1(q1 - a3);
} }
private static void DoFilter6(Span<byte> p, int offset, int step) private static void DoFilter6(Span<byte> p, int offset, int step)
@ -971,18 +1197,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
int q2 = p[offset + step2]; int q2 = p[offset + step2];
int a = WebpLookupTables.Sclip1[(3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1]]; int a = WebpLookupTables.Sclip1((3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1));
// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
int a1 = ((27 * a) + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 int a1 = ((27 * a) + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
int a2 = ((18 * a) + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 int a2 = ((18 * a) + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
int a3 = ((9 * a) + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 int a3 = ((9 * a) + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
p[offset - step3] = WebpLookupTables.Clip1[p2 + a3]; p[offset - step3] = WebpLookupTables.Clip1(p2 + a3);
p[offset - step2] = WebpLookupTables.Clip1[p1 + a2]; p[offset - step2] = WebpLookupTables.Clip1(p1 + a2);
p[offsetMinusStep] = WebpLookupTables.Clip1[p0 + a1]; p[offsetMinusStep] = WebpLookupTables.Clip1(p0 + a1);
p[offset] = WebpLookupTables.Clip1[q0 - a1]; p[offset] = WebpLookupTables.Clip1(q0 - a1);
p[offset + step] = WebpLookupTables.Clip1[q1 - a2]; p[offset + step] = WebpLookupTables.Clip1(q1 - a2);
p[offset + step2] = WebpLookupTables.Clip1[q2 - a3]; p[offset + step2] = WebpLookupTables.Clip1(q2 - a3);
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
@ -992,7 +1218,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step]; int p0 = p[offset - step];
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
return (4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] <= t; return (4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) <= t;
} }
private static bool NeedsFilter2(Span<byte> p, int offset, int step, int t, int it) private static bool NeedsFilter2(Span<byte> p, int offset, int step, int t, int it)
@ -1007,14 +1233,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q1 = p[offset + step]; int q1 = p[offset + step];
int q2 = p[offset + step2]; int q2 = p[offset + step2];
int q3 = p[offset + step3]; int q3 = p[offset + step3];
if ((4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] > t) if ((4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) > t)
{ {
return false; return false;
} }
return WebpLookupTables.Abs0[p3 - p2] <= it && WebpLookupTables.Abs0[p2 - p1] <= it && return WebpLookupTables.Abs0(p3 - p2) <= it && WebpLookupTables.Abs0(p2 - p1) <= it &&
WebpLookupTables.Abs0[p1 - p0] <= it && WebpLookupTables.Abs0[q3 - q2] <= it && WebpLookupTables.Abs0(p1 - p0) <= it && WebpLookupTables.Abs0(q3 - q2) <= it &&
WebpLookupTables.Abs0[q2 - q1] <= it && WebpLookupTables.Abs0[q1 - q0] <= it; WebpLookupTables.Abs0(q2 - q1) <= it && WebpLookupTables.Abs0(q1 - q0) <= it;
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
@ -1024,12 +1250,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step]; int p0 = p[offset - step];
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
return WebpLookupTables.Abs0[p1 - p0] > thresh || WebpLookupTables.Abs0[q1 - q0] > thresh; return WebpLookupTables.Abs0(p1 - p0) > thresh || WebpLookupTables.Abs0(q1 - q0) > thresh;
} }
[MethodImpl(InliningOptions.ShortMethod)]
private static int MultHi(int v, int coeff) => (v * coeff) >> 8;
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static void Store(Span<byte> dst, int x, int y, int v) private static void Store(Span<byte> dst, int x, int y, int v)
{ {
@ -1052,13 +1275,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static int Mul2(int a) => (a * 35468) >> 16; private static int Mul2(int a) => (a * 35468) >> 16;
[MethodImpl(InliningOptions.ShortMethod)]
private static byte Clip8(int v)
{
int yuvMask = (256 << 6) - 1;
return (byte)((v & ~yuvMask) == 0 ? v >> 6 : v < 0 ? 0 : 255);
}
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static void Put8x8uv(byte value, Span<byte> dst) private static void Put8x8uv(byte value, Span<byte> dst)
{ {

292
src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

@ -3,13 +3,18 @@
using System; using System;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
/// <summary> /// <summary>
/// Quantization methods. /// Quantization methods.
/// </summary> /// </summary>
internal static class QuantEnc internal static unsafe class QuantEnc
{ {
private static readonly byte[] Zigzag = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; private static readonly byte[] Zigzag = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 };
@ -17,6 +22,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
private const int MaxLevel = 2047; private const int MaxLevel = 2047;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<short> MaxCoeff2047 = Vector128.Create((short)MaxLevel);
private static readonly Vector128<byte> CstLo = Vector128.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13);
private static readonly Vector128<byte> Cst7 = Vector128.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255);
private static readonly Vector128<byte> CstHi = Vector128.Create(2, 3, 8, 9, 10, 11, 4, 5, 254, 255, 6, 7, 12, 13, 14, 15);
private static readonly Vector128<byte> Cst8 = Vector128.Create(254, 255, 254, 255, 254, 255, 0, 1, 254, 255, 254, 255, 254, 255, 254, 255);
#endif
// Diffusion weights. We under-correct a bit (15/16th of the error is actually // Diffusion weights. We under-correct a bit (15/16th of the error is actually
// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. // diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
private const int C1 = 7; // fraction of error sent to the 4x4 block below private const int C1 = 7; // fraction of error sent to the 4x4 block below
@ -31,7 +48,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int lambda = dqm.LambdaI16; int lambda = dqm.LambdaI16;
int tlambda = dqm.TLambda; int tlambda = dqm.TLambda;
Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
Span<int> scratch = it.Scratch3;
var rdTmp = new Vp8ModeScore(); var rdTmp = new Vp8ModeScore();
var res = new Vp8Residual();
Vp8ModeScore rdCur = rdTmp; Vp8ModeScore rdCur = rdTmp;
Vp8ModeScore rdBest = rd; Vp8ModeScore rdBest = rd;
int mode; int mode;
@ -39,7 +58,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rd.ModeI16 = -1; rd.ModeI16 = -1;
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
// scratch buffer. // Scratch buffer.
Span<byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc);
rdCur.ModeI16 = mode; rdCur.ModeI16 = mode;
@ -47,10 +66,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode); rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode);
// Measure RD-score. // Measure RD-score.
rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst); rdCur.D = LossyUtils.Vp8_Sse16X16(src, tmpDst);
rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY)) : 0; rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0;
rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode];
rdCur.R = it.GetCostLuma16(rdCur, proba); rdCur.R = it.GetCostLuma16(rdCur, proba, res);
if (isFlat) if (isFlat)
{ {
@ -101,6 +120,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int tlambda = dqm.TLambda; int tlambda = dqm.TLambda;
Span<byte> src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
Span<byte> bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc);
Span<int> scratch = it.Scratch3;
int totalHeaderBits = 0; int totalHeaderBits = 0;
var rdBest = new Vp8ModeScore(); var rdBest = new Vp8ModeScore();
@ -113,31 +133,35 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145) rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145)
rdBest.SetRdScore(dqm.LambdaMode); rdBest.SetRdScore(dqm.LambdaMode);
it.StartI4(); it.StartI4();
var rdi4 = new Vp8ModeScore();
var rdTmp = new Vp8ModeScore();
var res = new Vp8Residual();
Span<short> tmpLevels = new short[16];
do do
{ {
int numBlocks = 1; int numBlocks = 1;
var rdi4 = new Vp8ModeScore(); rdi4.Clear();
int mode; int mode;
int bestMode = -1; int bestMode = -1;
Span<byte> src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span<byte> src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]);
short[] modeCosts = it.GetCostModeI4(rd.ModesI4); short[] modeCosts = it.GetCostModeI4(rd.ModesI4);
Span<byte> bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span<byte> bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]);
Span<byte> tmpDst = it.Scratch.AsSpan(); Span<byte> tmpDst = it.Scratch.AsSpan();
tmpDst.Fill(0); tmpDst.Clear();
rdi4.InitScore(); rdi4.InitScore();
it.MakeIntra4Preds(); it.MakeIntra4Preds();
for (mode = 0; mode < WebpConstants.NumBModes; ++mode) for (mode = 0; mode < WebpConstants.NumBModes; ++mode)
{ {
var rdTmp = new Vp8ModeScore(); rdTmp.Clear();
short[] tmpLevels = new short[16]; tmpLevels.Clear();
// Reconstruct. // Reconstruct.
rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode);
// Compute RD-score. // Compute RD-score.
rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst); rdTmp.D = LossyUtils.Vp8_Sse4X4(src, tmpDst);
rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY)) : 0; rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0;
rdTmp.H = modeCosts[mode]; rdTmp.H = modeCosts[mode];
// Add flatness penalty, to avoid flat area to be mispredicted by a complex mode. // Add flatness penalty, to avoid flat area to be mispredicted by a complex mode.
@ -150,15 +174,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdTmp.R = 0; rdTmp.R = 0;
} }
// early-out check. // Early-out check.
rdTmp.SetRdScore(lambda); rdTmp.SetRdScore(lambda);
if (bestMode >= 0 && rdTmp.Score >= rdi4.Score) if (bestMode >= 0 && rdTmp.Score >= rdi4.Score)
{ {
continue; continue;
} }
// finish computing score. // Finish computing score.
rdTmp.R += it.GetCostLuma4(tmpLevels, proba); rdTmp.R += it.GetCostLuma4(tmpLevels, proba, res);
rdTmp.SetRdScore(lambda); rdTmp.SetRdScore(lambda);
if (bestMode < 0 || rdTmp.Score < rdi4.Score) if (bestMode < 0 || rdTmp.Score < rdi4.Score)
@ -213,22 +237,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span<byte> dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc); Span<byte> dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc);
Span<byte> dst = dst0; Span<byte> dst = dst0;
var rdBest = new Vp8ModeScore(); var rdBest = new Vp8ModeScore();
var rdUv = new Vp8ModeScore();
var res = new Vp8Residual();
int mode; int mode;
rd.ModeUv = -1; rd.ModeUv = -1;
rdBest.InitScore(); rdBest.InitScore();
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
var rdUv = new Vp8ModeScore(); rdUv.Clear();
// Reconstruct // Reconstruct
rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode);
// Compute RD-score // Compute RD-score
rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst); rdUv.D = LossyUtils.Vp8_Sse16X8(src, tmpDst);
rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas.
rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; rdUv.H = WebpConstants.Vp8FixedCostsUv[mode];
rdUv.R = it.GetCostUv(rdUv, proba); rdUv.R = it.GetCostUv(rdUv, proba, res);
if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv)) if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv))
{ {
rdUv.R += WebpConstants.FlatnessPenality * numBlocks; rdUv.R += WebpConstants.FlatnessPenality * numBlocks;
@ -271,31 +297,39 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
int nz = 0; int nz = 0;
int n; int n;
short[] dcTmp = new short[16]; Span<short> shortScratchSpan = it.Scratch2.AsSpan();
short[] tmp = new short[16 * 16]; Span<int> scratch = it.Scratch3.AsSpan(0, 16);
Span<short> tmpSpan = tmp.AsSpan(); shortScratchSpan.Clear();
scratch.Clear();
Span<short> dcTmp = shortScratchSpan.Slice(0, 16);
Span<short> tmp = shortScratchSpan.Slice(16, 16 * 16);
for (n = 0; n < 16; n += 2) for (n = 0; n < 16; n += 2)
{ {
Vp8Encoding.FTransform2(src.Slice(WebpLookupTables.Vp8Scan[n]), reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 16), tmpSpan.Slice((n + 1) * 16, 16)); Vp8Encoding.FTransform2(
src.Slice(WebpLookupTables.Vp8Scan[n]),
reference.Slice(WebpLookupTables.Vp8Scan[n]),
tmp.Slice(n * 16, 16),
tmp.Slice((n + 1) * 16, 16),
scratch);
} }
Vp8Encoding.FTransformWht(tmp, dcTmp); Vp8Encoding.FTransformWht(tmp, dcTmp, scratch);
nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; nz |= QuantizeBlock(dcTmp, rd.YDcLevels, ref dqm.Y2) << 24;
for (n = 0; n < 16; n += 2) for (n = 0; n < 16; n += 2)
{ {
// Zero-out the first coeff, so that: a) nz is correct below, and // Zero-out the first coeff, so that: a) nz is correct below, and
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
tmp[n * 16] = tmp[(n + 1) * 16] = 0; tmp[n * 16] = tmp[(n + 1) * 16] = 0;
nz |= Quantize2Blocks(tmpSpan.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), ref dqm.Y1) << n;
} }
// Transform back. // Transform back.
LossyUtils.TransformWht(dcTmp, tmpSpan); LossyUtils.TransformWht(dcTmp, tmp, scratch);
for (n = 0; n < 16; n += 2) for (n = 0; n < 16; n += 2)
{ {
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true); Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), scratch);
} }
return nz; return nz;
@ -304,10 +338,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span<short> levels, Span<byte> src, Span<byte> yuvOut, int mode) public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span<short> levels, Span<byte> src, Span<byte> yuvOut, int mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]);
short[] tmp = new short[16]; Span<short> tmp = it.Scratch2.AsSpan(0, 16);
Vp8Encoding.FTransform(src, reference, tmp); Span<int> scratch = it.Scratch3.AsSpan(0, 16);
int nz = QuantizeBlock(tmp, levels, dqm.Y1); Vp8Encoding.FTransform(src, reference, tmp, scratch);
Vp8Encoding.ITransform(reference, tmp, yuvOut, false); int nz = QuantizeBlock(tmp, levels, ref dqm.Y1);
Vp8Encoding.ITransformOne(reference, tmp, yuvOut, scratch);
return nz; return nz;
} }
@ -318,27 +353,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc);
int nz = 0; int nz = 0;
int n; int n;
short[] tmp = new short[8 * 16]; Span<short> tmp = it.Scratch2.AsSpan(0, 8 * 16);
Span<int> scratch = it.Scratch3.AsSpan(0, 16);
for (n = 0; n < 8; n += 2) for (n = 0; n < 8; n += 2)
{ {
Vp8Encoding.FTransform2( Vp8Encoding.FTransform2(
src.Slice(WebpLookupTables.Vp8ScanUv[n]), src.Slice(WebpLookupTables.Vp8ScanUv[n]),
reference.Slice(WebpLookupTables.Vp8ScanUv[n]), reference.Slice(WebpLookupTables.Vp8ScanUv[n]),
tmp.AsSpan(n * 16, 16), tmp.Slice(n * 16, 16),
tmp.AsSpan((n + 1) * 16, 16)); tmp.Slice((n + 1) * 16, 16),
scratch);
} }
CorrectDcValues(it, dqm.Uv, tmp, rd); CorrectDcValues(it, ref dqm.Uv, tmp, rd);
for (n = 0; n < 8; n += 2) for (n = 0; n < 8; n += 2)
{ {
nz |= Quantize2Blocks(tmp.AsSpan(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), ref dqm.Uv) << n;
} }
for (n = 0; n < 8; n += 2) for (n = 0; n < 8; n += 2)
{ {
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.AsSpan(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true); Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), scratch);
} }
return nz << 16; return nz << 16;
@ -370,7 +407,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]);
long score = (LossyUtils.Vp8Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16);
if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit) if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit)
{ {
@ -417,7 +454,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
for (mode = 0; mode < WebpConstants.NumBModes; ++mode) for (mode = 0; mode < WebpConstants.NumBModes; ++mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]);
long score = (LossyUtils.Vp8Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4);
if (score < bestI4Score) if (score < bestI4Score)
{ {
bestI4Mode = mode; bestI4Mode = mode;
@ -466,7 +503,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]);
long score = (LossyUtils.Vp8Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv);
if (score < bestUvScore) if (score < bestUvScore)
{ {
bestMode = mode; bestMode = mode;
@ -484,58 +521,155 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Quantize2Blocks(Span<short> input, Span<short> output, Vp8Matrix mtx) public static int Quantize2Blocks(Span<short> input, Span<short> output, ref Vp8Matrix mtx)
{ {
int nz = QuantizeBlock(input, output, mtx) << 0; int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), ref mtx) << 0;
nz |= QuantizeBlock(input.Slice(1 * 16), output.Slice(1 * 16), mtx) << 1; nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), ref mtx) << 1;
return nz; return nz;
} }
public static int QuantizeBlock(Span<short> input, Span<short> output, Vp8Matrix mtx) public static int QuantizeBlock(Span<short> input, Span<short> output, ref Vp8Matrix mtx)
{ {
int last = -1; #if SUPPORTS_RUNTIME_INTRINSICS
int n; if (Sse41.IsSupported)
for (n = 0; n < 16; ++n) {
// Load all inputs.
Vector128<short> input0 = Unsafe.As<short, Vector128<short>>(ref MemoryMarshal.GetReference(input));
Vector128<short> input8 = Unsafe.As<short, Vector128<short>>(ref MemoryMarshal.GetReference(input.Slice(8, 8)));
Vector128<ushort> iq0 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.IQ[0]);
Vector128<ushort> iq8 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.IQ[8]);
Vector128<ushort> q0 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.Q[0]);
Vector128<ushort> q8 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.Q[8]);
// coeff = abs(in)
Vector128<ushort> coeff0 = Ssse3.Abs(input0);
Vector128<ushort> coeff8 = Ssse3.Abs(input8);
// coeff = abs(in) + sharpen
Vector128<short> sharpen0 = Unsafe.As<short, Vector128<short>>(ref mtx.Sharpen[0]);
Vector128<short> sharpen8 = Unsafe.As<short, Vector128<short>>(ref mtx.Sharpen[8]);
Sse2.Add(coeff0.AsInt16(), sharpen0);
Sse2.Add(coeff8.AsInt16(), sharpen8);
// out = (coeff * iQ + B) >> QFIX
// doing calculations with 32b precision (QFIX=17)
// out = (coeff * iQ)
Vector128<ushort> coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0);
Vector128<ushort> coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0);
Vector128<ushort> coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8);
Vector128<ushort> coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8);
Vector128<ushort> out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H);
Vector128<ushort> out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H);
Vector128<ushort> out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H);
Vector128<ushort> out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H);
// out = (coeff * iQ + B)
Vector128<uint> bias00 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[0]);
Vector128<uint> bias04 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[4]);
Vector128<uint> bias08 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[8]);
Vector128<uint> bias12 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[12]);
out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16();
out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16();
out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16();
out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16();
// out = QUANTDIV(coeff, iQ, B, QFIX)
out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16();
out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16();
out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16();
out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16();
// pack result as 16b
Vector128<short> out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32());
Vector128<short> out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32());
// if (coeff > 2047) coeff = 2047
out0 = Sse2.Min(out0, MaxCoeff2047);
out8 = Sse2.Min(out8, MaxCoeff2047);
// put sign back
out0 = Ssse3.Sign(out0, input0);
out8 = Ssse3.Sign(out8, input8);
// in = out * Q
input0 = Sse2.MultiplyLow(out0, q0.AsInt16());
input8 = Sse2.MultiplyLow(out8, q8.AsInt16());
// in = out * Q
ref short inputRef = ref MemoryMarshal.GetReference(input);
Unsafe.As<short, Vector128<short>>(ref inputRef) = input0;
Unsafe.As<short, Vector128<short>>(ref Unsafe.Add(ref inputRef, 8)) = input8;
// zigzag the output before storing it. The re-ordering is:
// 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15
// -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15
// There's only two misplaced entries ([8] and [7]) that are crossing the
// reg's boundaries.
// We use pshufb instead of pshuflo/pshufhi.
Vector128<byte> tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo);
Vector128<byte> tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7
Vector128<byte> tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi);
Vector128<byte> tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8
Vector128<byte> outZ0 = Sse2.Or(tmpLo, tmp8);
Vector128<byte> outZ8 = Sse2.Or(tmpHi, tmp7);
ref short outputRef = ref MemoryMarshal.GetReference(output);
Unsafe.As<short, Vector128<short>>(ref outputRef) = outZ0.AsInt16();
Unsafe.As<short, Vector128<short>>(ref Unsafe.Add(ref outputRef, 8)) = outZ8.AsInt16();
Vector128<sbyte> packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16());
// Detect if all 'out' values are zeros or not.
Vector128<sbyte> cmpeq = Sse2.CompareEqual(packedOutput, Vector128<sbyte>.Zero);
return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0;
}
else
#endif
{ {
int j = Zigzag[n]; int last = -1;
bool sign = input[j] < 0; int n;
uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); for (n = 0; n < 16; ++n)
if (coeff > mtx.ZThresh[j])
{ {
uint q = mtx.Q[j]; int j = Zigzag[n];
uint iQ = mtx.IQ[j]; bool sign = input[j] < 0;
uint b = mtx.Bias[j]; uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]);
int level = QuantDiv(coeff, iQ, b); if (coeff > mtx.ZThresh[j])
if (level > MaxLevel)
{ {
level = MaxLevel; uint q = mtx.Q[j];
} uint iQ = mtx.IQ[j];
uint b = mtx.Bias[j];
int level = QuantDiv(coeff, iQ, b);
if (level > MaxLevel)
{
level = MaxLevel;
}
if (sign) if (sign)
{ {
level = -level; level = -level;
} }
input[j] = (short)(level * (int)q); input[j] = (short)(level * (int)q);
output[n] = (short)level; output[n] = (short)level;
if (level != 0) if (level != 0)
{
last = n;
}
}
else
{ {
last = n; output[n] = 0;
input[j] = 0;
} }
} }
else
{
output[n] = 0;
input[j] = 0;
}
}
return last >= 0 ? 1 : 0; return last >= 0 ? 1 : 0;
}
} }
// Quantize as usual, but also compute and return the quantization error. // Quantize as usual, but also compute and return the quantization error.
// Error is already divided by DSHIFT. // Error is already divided by DSHIFT.
public static int QuantizeSingle(Span<short> v, Vp8Matrix mtx) public static int QuantizeSingle(Span<short> v, ref Vp8Matrix mtx)
{ {
int v0 = v[0]; int v0 = v[0];
bool sign = v0 < 0; bool sign = v0 < 0;
@ -556,7 +690,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return (sign ? -v0 : v0) >> DSCALE; return (sign ? -v0 : v0) >> DSCALE;
} }
public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, short[] tmp, Vp8ModeScore rd) public static void CorrectDcValues(Vp8EncIterator it, ref Vp8Matrix mtx, Span<short> tmp, Vp8ModeScore rd)
{ {
#pragma warning disable SA1005 // Single line comments should begin with single space #pragma warning disable SA1005 // Single line comments should begin with single space
// | top[0] | top[1] // | top[0] | top[1]
@ -571,15 +705,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
Span<sbyte> top = it.TopDerr.AsSpan((it.X * 4) + ch, 2); Span<sbyte> top = it.TopDerr.AsSpan((it.X * 4) + ch, 2);
Span<sbyte> left = it.LeftDerr.AsSpan(ch, 2); Span<sbyte> left = it.LeftDerr.AsSpan(ch, 2);
Span<short> c = tmp.AsSpan(ch * 4 * 16, 4 * 16); Span<short> c = tmp.Slice(ch * 4 * 16, 4 * 16);
c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE));
int err0 = QuantizeSingle(c, mtx); int err0 = QuantizeSingle(c, ref mtx);
c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE));
int err1 = QuantizeSingle(c.Slice(1 * 16), mtx); int err1 = QuantizeSingle(c.Slice(1 * 16), ref mtx);
c[2 * 16] += (short)(((C1 * err0) + (C2 * left[1])) >> (DSHIFT - DSCALE)); c[2 * 16] += (short)(((C1 * err0) + (C2 * left[1])) >> (DSHIFT - DSCALE));
int err2 = QuantizeSingle(c.Slice(2 * 16), mtx); int err2 = QuantizeSingle(c.Slice(2 * 16), ref mtx);
c[3 * 16] += (short)(((C1 * err1) + (C2 * err2)) >> (DSHIFT - DSCALE)); c[3 * 16] += (short)(((C1 * err1) + (C2 * err2)) >> (DSHIFT - DSCALE));
int err3 = QuantizeSingle(c.Slice(3 * 16), mtx); int err3 = QuantizeSingle(c.Slice(3 * 16), ref mtx);
rd.Derr[ch, 0] = err1; rd.Derr[ch, 0] = err1;
rd.Derr[ch, 1] = err2; rd.Derr[ch, 1] = err2;

60
src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs

@ -81,6 +81,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.I4Boundary = new byte[37]; this.I4Boundary = new byte[37];
this.BitCount = new long[4, 3]; this.BitCount = new long[4, 3];
this.Scratch = new byte[WebpConstants.Bps * 16]; this.Scratch = new byte[WebpConstants.Bps * 16];
this.Scratch2 = new short[17 * 16];
this.Scratch3 = new int[16];
// To match the C initial values of the reference implementation, initialize all with 204. // To match the C initial values of the reference implementation, initialize all with 204.
byte defaultInitVal = 204; byte defaultInitVal = 204;
@ -216,10 +218,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public int CountDown { get; set; } public int CountDown { get; set; }
/// <summary> /// <summary>
/// Gets the scratch buffer. /// Gets the byte scratch buffer.
/// </summary> /// </summary>
public byte[] Scratch { get; } public byte[] Scratch { get; }
/// <summary>
/// Gets the short scratch buffer.
/// </summary>
public short[] Scratch2 { get; }
/// <summary>
/// Gets the int scratch buffer.
/// </summary>
public int[] Scratch3 { get; }
public Vp8MacroBlockInfo CurrentMacroBlockInfo => this.Mb[this.currentMbIdx]; public Vp8MacroBlockInfo CurrentMacroBlockInfo => this.Mb[this.currentMbIdx];
private Vp8MacroBlockInfo[] Mb { get; } private Vp8MacroBlockInfo[] Mb { get; }
@ -345,15 +357,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q = quality; int q = quality;
int kThreshold = 8 + ((17 - 8) * q / 100); int kThreshold = 8 + ((17 - 8) * q / 100);
int k; int k;
uint[] dc = new uint[16]; Span<uint> dc = stackalloc uint[16];
Span<ushort> tmp = stackalloc ushort[16];
uint m; uint m;
uint m2; uint m2;
for (k = 0; k < 16; k += 4) for (k = 0; k < 16; k += 4)
{ {
this.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.AsSpan(k)); LossyUtils.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4));
} }
for (m = 0, m2 = 0, k = 0; k < 16; ++k) for (m = 0, m2 = 0, k = 0; k < 16; k++)
{ {
m += dc[k]; m += dc[k];
m2 += dc[k] * dc[k]; m2 += dc[k] * dc[k];
@ -380,7 +393,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int bestMode = 0; int bestMode = 0;
this.MakeLuma16Preds(); this.MakeLuma16Preds();
for (mode = 0; mode < maxMode; ++mode) for (mode = 0; mode < maxMode; mode++)
{ {
var histo = new Vp8Histogram(); var histo = new Vp8Histogram();
histo.CollectHistogram(this.YuvIn.AsSpan(YOffEnc), this.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]), 0, 16); histo.CollectHistogram(this.YuvIn.AsSpan(YOffEnc), this.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]), 0, 16);
@ -499,9 +512,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.CurrentMacroBlockInfo.MacroBlockType = Vp8MacroBlockType.I4X4; this.CurrentMacroBlockInfo.MacroBlockType = Vp8MacroBlockType.I4X4;
} }
public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba) public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res)
{ {
var res = new Vp8Residual();
int r = 0; int r = 0;
// re-import the non-zero context. // re-import the non-zero context.
@ -539,11 +551,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return WebpLookupTables.Vp8FixedCostsI4[top, left]; return WebpLookupTables.Vp8FixedCostsI4[top, left];
} }
public int GetCostLuma4(short[] levels, Vp8EncProba proba) public int GetCostLuma4(Span<short> levels, Vp8EncProba proba, Vp8Residual res)
{ {
int x = this.I4 & 3; int x = this.I4 & 3;
int y = this.I4 >> 2; int y = this.I4 >> 2;
var res = new Vp8Residual();
int r = 0; int r = 0;
res.Init(0, 3, proba); res.Init(0, 3, proba);
@ -553,9 +564,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return r; return r;
} }
public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba) public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res)
{ {
var res = new Vp8Residual();
int r = 0; int r = 0;
// re-import the non-zero context. // re-import the non-zero context.
@ -741,7 +751,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Vp8Encoding.EncPredChroma8(this.YuvP, left, top); Vp8Encoding.EncPredChroma8(this.YuvP, left, top);
} }
public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx); public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx, this.Scratch.AsSpan(0, 4));
public void SwapOut() public void SwapOut()
{ {
@ -814,24 +824,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.Nz[this.nzIdx] = nz; this.Nz[this.nzIdx] = nz;
} }
private void Mean16x4(Span<byte> input, Span<uint> dc)
{
for (int k = 0; k < 4; k++)
{
uint avg = 0;
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 4; x++)
{
avg += input[x + (y * WebpConstants.Bps)];
}
}
dc[k] = avg;
input = input.Slice(4); // go to next 4x4 block.
}
}
private void ImportBlock(Span<byte> src, int srcStride, Span<byte> dst, int w, int h, int size) private void ImportBlock(Span<byte> src, int srcStride, Span<byte> dst, int w, int h, int size)
{ {
int dstIdx = 0; int dstIdx = 0;
@ -919,7 +911,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.LeftNz[8] = 0; this.LeftNz[8] = 0;
this.LeftDerr.AsSpan().Fill(0); this.LeftDerr.AsSpan().Clear();
} }
private void InitTop() private void InitTop()
@ -927,14 +919,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int topSize = this.mbw * 16; int topSize = this.mbw * 16;
this.YTop.AsSpan(0, topSize).Fill(127); this.YTop.AsSpan(0, topSize).Fill(127);
this.UvTop.AsSpan().Fill(127); this.UvTop.AsSpan().Fill(127);
this.Nz.AsSpan().Fill(0); this.Nz.AsSpan().Clear();
int predsW = (4 * this.mbw) + 1; int predsW = (4 * this.mbw) + 1;
int predsH = (4 * this.mbh) + 1; int predsH = (4 * this.mbh) + 1;
int predsSize = predsW * predsH; int predsSize = predsW * predsH;
this.Preds.AsSpan(predsSize + this.predsWidth, this.mbw).Fill(0); this.Preds.AsSpan(predsSize + this.predsWidth, this.mbw).Clear();
this.TopDerr.AsSpan().Fill(0); this.TopDerr.AsSpan().Clear();
} }
private int Bit(uint nz, int n) => (nz & (1 << n)) != 0 ? 1 : 0; private int Bit(uint nz, int n) => (nz & (1 << n)) != 0 ? 1 : 0;

32
src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs

@ -70,6 +70,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary> /// </summary>
private int uvAlpha; private int uvAlpha;
/// <summary>
/// Scratch buffer to reduce allocations.
/// </summary>
private readonly int[] scratch = new int[16];
private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 }; private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 };
private const int NumMbSegments = 4; private const int NumMbSegments = 4;
@ -317,22 +322,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.bitWriter = new Vp8BitWriter(expectedSize, this); this.bitWriter = new Vp8BitWriter(expectedSize, this);
// TODO: EncodeAlpha(); // TODO: EncodeAlpha();
bool hasAlpha = false;
// Stats-collection loop. // Stats-collection loop.
this.StatLoop(width, height, yStride, uvStride); this.StatLoop(width, height, yStride, uvStride);
it.Init(); it.Init();
it.InitFilter(); it.InitFilter();
var info = new Vp8ModeScore();
var residual = new Vp8Residual();
do do
{ {
bool dontUseSkip = !this.Proba.UseSkipProba; bool dontUseSkip = !this.Proba.UseSkipProba;
info.Clear();
var info = new Vp8ModeScore();
it.Import(y, u, v, yStride, uvStride, width, height, false); it.Import(y, u, v, yStride, uvStride, width, height, false);
// Warning! order is important: first call VP8Decimate() and // Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one. // *then* decide how to code the skip decision if there's one.
if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip) if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip)
{ {
this.CodeResiduals(it, info); this.CodeResiduals(it, info, residual);
} }
else else
{ {
@ -348,7 +356,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Write bytes from the bitwriter buffer to the stream. // Write bytes from the bitwriter buffer to the stream.
image.Metadata.SyncProfiles(); image.Metadata.SyncProfiles();
this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height); this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height, hasAlpha);
} }
/// <inheritdoc/> /// <inheritdoc/>
@ -447,9 +455,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
it.Init(); it.Init();
this.SetLoopParams(stats.Q); this.SetLoopParams(stats.Q);
var info = new Vp8ModeScore();
do do
{ {
var info = new Vp8ModeScore(); info.Clear();
it.Import(y, u, v, yStride, uvStride, width, height, false); it.Import(y, u, v, yStride, uvStride, width, height, false);
if (this.Decimate(it, ref info, rdOpt)) if (this.Decimate(it, ref info, rdOpt))
{ {
@ -493,7 +502,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.ResetStats(); this.ResetStats();
} }
private void AdjustFilterStrength() private unsafe void AdjustFilterStrength()
{ {
if (this.filterStrength > 0) if (this.filterStrength > 0)
{ {
@ -537,7 +546,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int predsW = (4 * this.Mbw) + 1; int predsW = (4 * this.Mbw) + 1;
int predsH = (4 * this.Mbh) + 1; int predsH = (4 * this.Mbh) + 1;
int predsSize = predsW * predsH; int predsSize = predsW * predsH;
this.Preds.AsSpan(predsSize + this.PredsWidth - 4, 4).Fill(0); this.Preds.AsSpan(predsSize + this.PredsWidth - 4, 4).Clear();
this.Nz[0] = 0; // constant this.Nz[0] = 0; // constant
} }
@ -797,7 +806,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
proba.NbSkip = 0; proba.NbSkip = 0;
} }
private void SetupMatrices(Vp8SegmentInfo[] dqm) private unsafe void SetupMatrices(Vp8SegmentInfo[] dqm)
{ {
int tlambdaScale = this.method >= WebpEncodingMethod.Default ? this.spatialNoiseShaping : 0; int tlambdaScale = this.method >= WebpEncodingMethod.Default ? this.spatialNoiseShaping : 0;
for (int i = 0; i < dqm.Length; i++) for (int i = 0; i < dqm.Length; i++)
@ -805,10 +814,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Vp8SegmentInfo m = dqm[i]; Vp8SegmentInfo m = dqm[i];
int q = m.Quant; int q = m.Quant;
m.Y1 = new Vp8Matrix();
m.Y2 = new Vp8Matrix();
m.Uv = new Vp8Matrix();
m.Y1.Q[0] = WebpLookupTables.DcTable[Numerics.Clamp(q + this.DqY1Dc, 0, 127)]; m.Y1.Q[0] = WebpLookupTables.DcTable[Numerics.Clamp(q + this.DqY1Dc, 0, 127)];
m.Y1.Q[1] = WebpLookupTables.AcTable[Numerics.Clamp(q, 0, 127)]; m.Y1.Q[1] = WebpLookupTables.AcTable[Numerics.Clamp(q, 0, 127)];
@ -930,10 +935,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return isSkipped; return isSkipped;
} }
private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd) private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd, Vp8Residual residual)
{ {
int x, y, ch; int x, y, ch;
var residual = new Vp8Residual();
bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16;
int segment = it.CurrentMacroBlockInfo.Segment; int segment = it.CurrentMacroBlockInfo.Segment;

366
src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs

@ -4,6 +4,11 @@
using System; using System;
using System.Buffers.Binary; using System.Buffers.Binary;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
@ -60,6 +65,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static readonly int[] Vp8I4ModeOffsets = { I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 }; public static readonly int[] Vp8I4ModeOffsets = { I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 };
#if SUPPORTS_RUNTIME_INTRINSICS
public static readonly Vector128<short> K1 = Vector128.Create((short)20091).AsInt16();
public static readonly Vector128<short> K2 = Vector128.Create((short)-30068).AsInt16();
public static readonly Vector128<short> Four = Vector128.Create((short)4);
#endif
static Vp8Encoding() static Vp8Encoding()
{ {
for (int i = -255; i <= 255 + 255; i++) for (int i = -255; i <= 255 + 255; i++)
@ -68,64 +81,310 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
public static void ITransform(Span<byte> reference, Span<short> input, Span<byte> dst, bool doTwo) // Transforms (Paragraph 14.4)
// Does two inverse transforms.
public static void ITransform(Span<byte> reference, Span<short> input, Span<byte> dst, Span<int> scratch)
{ {
ITransformOne(reference, input, dst); #if SUPPORTS_RUNTIME_INTRINSICS
if (doTwo) if (Sse2.IsSupported)
{ {
ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4)); // This implementation makes use of 16-bit fixed point versions of two
// multiply constants:
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
// K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
//
// To be able to use signed 16-bit integers, we use the following trick to
// have constants within range:
// - Associated constants are obtained by subtracting the 16-bit fixed point
// version of one:
// k = K - (1 << 16) => K = k + (1 << 16)
// K1 = 85267 => k1 = 20091
// K2 = 35468 => k2 = -30068
// - The multiplication of a variable by a constant become the sum of the
// variable and the multiplication of that variable by the associated
// constant:
// (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
// Load and concatenate the transform coefficients (we'll do two inverse
// transforms in parallel). In the case of only one inverse transform, the
// second half of the vectors will just contain random value we'll never
// use nor store.
ref short inputRef = ref MemoryMarshal.GetReference(input);
var in0 = Vector128.Create(Unsafe.As<short, long>(ref inputRef), 0);
var in1 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 4)), 0);
var in2 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 8)), 0);
var in3 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 12)), 0);
// a00 a10 a20 a30 x x x x
// a01 a11 a21 a31 x x x x
// a02 a12 a22 a32 x x x x
// a03 a13 a23 a33 x x x x
var inb0 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 16)), 0);
var inb1 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 20)), 0);
var inb2 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 24)), 0);
var inb3 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 28)), 0);
in0 = Sse2.UnpackLow(in0, inb0);
in1 = Sse2.UnpackLow(in1, inb1);
in2 = Sse2.UnpackLow(in2, inb2);
in3 = Sse2.UnpackLow(in3, inb3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
// Vertical pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformVerticalPass(in0, in2, in1, in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128<long> t0, out Vector128<long> t1, out Vector128<long> t2, out Vector128<long> t3);
// Horizontal pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformHorizontalPass(t0, t2, t1, t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3);
// Add inverse transform to 'ref' and store.
// Load the reference(s).
Vector128<byte> ref0 = Vector128<byte>.Zero;
Vector128<byte> ref1 = Vector128<byte>.Zero;
Vector128<byte> ref2 = Vector128<byte>.Zero;
Vector128<byte> ref3 = Vector128<byte>.Zero;
ref byte referenceRef = ref MemoryMarshal.GetReference(reference);
// Load eight bytes/pixels per line.
ref0 = Vector128.Create(Unsafe.As<byte, long>(ref referenceRef), 0).AsByte();
ref1 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps)), 0).AsByte();
ref2 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2)), 0).AsByte();
ref3 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3)), 0).AsByte();
// Convert to 16b.
ref0 = Sse2.UnpackLow(ref0, Vector128<byte>.Zero);
ref1 = Sse2.UnpackLow(ref1, Vector128<byte>.Zero);
ref2 = Sse2.UnpackLow(ref2, Vector128<byte>.Zero);
ref3 = Sse2.UnpackLow(ref3, Vector128<byte>.Zero);
// Add the inverse transform(s).
Vector128<short> ref0InvAdded = Sse2.Add(ref0.AsInt16(), t0.AsInt16());
Vector128<short> ref1InvAdded = Sse2.Add(ref1.AsInt16(), t1.AsInt16());
Vector128<short> ref2InvAdded = Sse2.Add(ref2.AsInt16(), t2.AsInt16());
Vector128<short> ref3InvAdded = Sse2.Add(ref3.AsInt16(), t3.AsInt16());
// Unsigned saturate to 8b.
ref0 = Sse2.PackUnsignedSaturate(ref0InvAdded, ref0InvAdded);
ref1 = Sse2.PackUnsignedSaturate(ref1InvAdded, ref1InvAdded);
ref2 = Sse2.PackUnsignedSaturate(ref2InvAdded, ref2InvAdded);
ref3 = Sse2.PackUnsignedSaturate(ref3InvAdded, ref3InvAdded);
// Unsigned saturate to 8b.
ref byte outputRef = ref MemoryMarshal.GetReference(dst);
// Store eight bytes/pixels per line.
Unsafe.As<byte, Vector64<byte>>(ref outputRef) = ref0.GetLower();
Unsafe.As<byte, Vector64<byte>>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps)) = ref1.GetLower();
Unsafe.As<byte, Vector64<byte>>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 2)) = ref2.GetLower();
Unsafe.As<byte, Vector64<byte>>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 3)) = ref3.GetLower();
}
else
#endif
{
ITransformOne(reference, input, dst, scratch);
ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4), scratch);
} }
} }
public static void ITransformOne(Span<byte> reference, Span<short> input, Span<byte> dst) public static void ITransformOne(Span<byte> reference, Span<short> input, Span<byte> dst, Span<int> scratch)
{ {
int i; #if SUPPORTS_RUNTIME_INTRINSICS
#pragma warning disable SA1312 // Variable names should begin with lower-case letter if (Sse2.IsSupported)
int[] C = new int[4 * 4];
#pragma warning restore SA1312 // Variable names should begin with lower-case letter
Span<int> tmp = C.AsSpan();
for (i = 0; i < 4; i++)
{ {
// vertical pass. // Load and concatenate the transform coefficients (we'll do two inverse
int a = input[0] + input[8]; // transforms in parallel). In the case of only one inverse transform, the
int b = input[0] - input[8]; // second half of the vectors will just contain random value we'll never
int c = Mul(input[4], KC2) - Mul(input[12], KC1); // use nor store.
int d = Mul(input[4], KC1) + Mul(input[12], KC2); ref short inputRef = ref MemoryMarshal.GetReference(input);
tmp[0] = a + d; var in0 = Vector128.Create(Unsafe.As<short, long>(ref inputRef), 0);
tmp[1] = b + c; var in1 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 4)), 0);
tmp[2] = b - c; var in2 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 8)), 0);
tmp[3] = a - d; var in3 = Vector128.Create(Unsafe.As<short, long>(ref Unsafe.Add(ref inputRef, 12)), 0);
tmp = tmp.Slice(4);
input = input.Slice(1); // a00 a10 a20 a30 x x x x
// a01 a11 a21 a31 x x x x
// a02 a12 a22 a32 x x x x
// a03 a13 a23 a33 x x x x
// Vertical pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformVerticalPass(in0, in2, in1, in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128<long> t0, out Vector128<long> t1, out Vector128<long> t2, out Vector128<long> t3);
// Horizontal pass and subsequent transpose.
// First pass, c and d calculations are longer because of the "trick" multiplications.
InverseTransformHorizontalPass(t0, t2, t1, t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3);
// Transpose the two 4x4.
LossyUtils.Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3);
// Add inverse transform to 'ref' and store.
// Load the reference(s).
Vector128<byte> ref0 = Vector128<byte>.Zero;
Vector128<byte> ref1 = Vector128<byte>.Zero;
Vector128<byte> ref2 = Vector128<byte>.Zero;
Vector128<byte> ref3 = Vector128<byte>.Zero;
ref byte referenceRef = ref MemoryMarshal.GetReference(reference);
// Load four bytes/pixels per line.
ref0 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref referenceRef)).AsByte();
ref1 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps))).AsByte();
ref2 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2))).AsByte();
ref3 = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3))).AsByte();
// Convert to 16b.
ref0 = Sse2.UnpackLow(ref0, Vector128<byte>.Zero);
ref1 = Sse2.UnpackLow(ref1, Vector128<byte>.Zero);
ref2 = Sse2.UnpackLow(ref2, Vector128<byte>.Zero);
ref3 = Sse2.UnpackLow(ref3, Vector128<byte>.Zero);
// Add the inverse transform(s).
Vector128<short> ref0InvAdded = Sse2.Add(ref0.AsInt16(), t0.AsInt16());
Vector128<short> ref1InvAdded = Sse2.Add(ref1.AsInt16(), t1.AsInt16());
Vector128<short> ref2InvAdded = Sse2.Add(ref2.AsInt16(), t2.AsInt16());
Vector128<short> ref3InvAdded = Sse2.Add(ref3.AsInt16(), t3.AsInt16());
// Unsigned saturate to 8b.
ref0 = Sse2.PackUnsignedSaturate(ref0InvAdded, ref0InvAdded);
ref1 = Sse2.PackUnsignedSaturate(ref1InvAdded, ref1InvAdded);
ref2 = Sse2.PackUnsignedSaturate(ref2InvAdded, ref2InvAdded);
ref3 = Sse2.PackUnsignedSaturate(ref3InvAdded, ref3InvAdded);
// Unsigned saturate to 8b.
ref byte outputRef = ref MemoryMarshal.GetReference(dst);
// Store four bytes/pixels per line.
int output0 = Sse2.ConvertToInt32(ref0.AsInt32());
int output1 = Sse2.ConvertToInt32(ref1.AsInt32());
int output2 = Sse2.ConvertToInt32(ref2.AsInt32());
int output3 = Sse2.ConvertToInt32(ref3.AsInt32());
Unsafe.As<byte, int>(ref outputRef) = output0;
Unsafe.As<byte, int>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps)) = output1;
Unsafe.As<byte, int>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 2)) = output2;
Unsafe.As<byte, int>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 3)) = output3;
} }
else
tmp = C.AsSpan(); #endif
for (i = 0; i < 4; i++)
{ {
// horizontal pass. int i;
int dc = tmp[0] + 4; Span<int> tmp = scratch.Slice(0, 16);
int a = dc + tmp[8]; for (i = 0; i < 4; i++)
int b = dc - tmp[8]; {
int c = Mul(tmp[4], KC2) - Mul(tmp[12], KC1); // vertical pass.
int d = Mul(tmp[4], KC1) + Mul(tmp[12], KC2); int a = input[0] + input[8];
Store(dst, reference, 0, i, a + d); int b = input[0] - input[8];
Store(dst, reference, 1, i, b + c); int c = Mul(input[4], KC2) - Mul(input[12], KC1);
Store(dst, reference, 2, i, b - c); int d = Mul(input[4], KC1) + Mul(input[12], KC2);
Store(dst, reference, 3, i, a - d); tmp[0] = a + d;
tmp = tmp.Slice(1); tmp[1] = b + c;
tmp[2] = b - c;
tmp[3] = a - d;
tmp = tmp.Slice(4);
input = input.Slice(1);
}
tmp = scratch;
for (i = 0; i < 4; i++)
{
// horizontal pass.
int dc = tmp[0] + 4;
int a = dc + tmp[8];
int b = dc - tmp[8];
int c = Mul(tmp[4], KC2) - Mul(tmp[12], KC1);
int d = Mul(tmp[4], KC1) + Mul(tmp[12], KC2);
Store(dst, reference, 0, i, a + d);
Store(dst, reference, 1, i, b + c);
Store(dst, reference, 2, i, b - c);
Store(dst, reference, 3, i, a - d);
tmp = tmp.Slice(1);
}
} }
} }
public static void FTransform2(Span<byte> src, Span<byte> reference, Span<short> output, Span<short> output2) #if SUPPORTS_RUNTIME_INTRINSICS
private static void InverseTransformVerticalPass(Vector128<long> in0, Vector128<long> in2, Vector128<long> in1, Vector128<long> in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3)
{ {
FTransform(src, reference, output); Vector128<short> a = Sse2.Add(in0.AsInt16(), in2.AsInt16());
FTransform(src.Slice(4), reference.Slice(4), output2); Vector128<short> b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16());
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2);
Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1);
Vector128<short> c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16());
Vector128<short> c4 = Sse2.Subtract(c1, c2);
Vector128<short> c = Sse2.Add(c3, c4);
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1);
Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2);
Vector128<short> d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16());
Vector128<short> d4 = Sse2.Add(d1, d2);
Vector128<short> d = Sse2.Add(d3, d4);
// Second pass.
tmp0 = Sse2.Add(a, d);
tmp1 = Sse2.Add(b, c);
tmp2 = Sse2.Subtract(b, c);
tmp3 = Sse2.Subtract(a, d);
} }
public static void FTransform(Span<byte> src, Span<byte> reference, Span<short> output) private static void InverseTransformHorizontalPass(Vector128<long> t0, Vector128<long> t2, Vector128<long> t1, Vector128<long> t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3)
{
Vector128<short> dc = Sse2.Add(t0.AsInt16(), Four);
Vector128<short> a = Sse2.Add(dc, t2.AsInt16());
Vector128<short> b = Sse2.Subtract(dc, t2.AsInt16());
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
Vector128<short> c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2);
Vector128<short> c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1);
Vector128<short> c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16());
Vector128<short> c4 = Sse2.Subtract(c1, c2);
Vector128<short> c = Sse2.Add(c3, c4);
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
Vector128<short> d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1);
Vector128<short> d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2);
Vector128<short> d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16());
Vector128<short> d4 = Sse2.Add(d1, d2);
Vector128<short> d = Sse2.Add(d3, d4);
// Second pass.
Vector128<short> tmp0 = Sse2.Add(a, d);
Vector128<short> tmp1 = Sse2.Add(b, c);
Vector128<short> tmp2 = Sse2.Subtract(b, c);
Vector128<short> tmp3 = Sse2.Subtract(a, d);
shifted0 = Sse2.ShiftRightArithmetic(tmp0, 3);
shifted1 = Sse2.ShiftRightArithmetic(tmp1, 3);
shifted2 = Sse2.ShiftRightArithmetic(tmp2, 3);
shifted3 = Sse2.ShiftRightArithmetic(tmp3, 3);
}
#endif
public static void FTransform2(Span<byte> src, Span<byte> reference, Span<short> output, Span<short> output2, Span<int> scratch)
{
FTransform(src, reference, output, scratch);
FTransform(src.Slice(4), reference.Slice(4), output2, scratch);
}
public static void FTransform(Span<byte> src, Span<byte> reference, Span<short> output, Span<int> scratch)
{ {
int i; int i;
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
int srcIdx = 0; int srcIdx = 0;
int refIdx = 0; int refIdx = 0;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
@ -160,9 +419,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
public static void FTransformWht(Span<short> input, Span<short> output) public static void FTransformWht(Span<short> input, Span<short> output, Span<int> scratch)
{ {
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
int i; int i;
int inputIdx = 0; int inputIdx = 0;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
@ -234,11 +494,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Left samples are top[-5 .. -2], top_left is top[-1], top are // Left samples are top[-5 .. -2], top_left is top[-1], top are
// located at top[0..3], and top right is top[4..7] // located at top[0..3], and top right is top[4..7]
public static void EncPredLuma4(Span<byte> dst, Span<byte> top, int topOffset) public static void EncPredLuma4(Span<byte> dst, Span<byte> top, int topOffset, Span<byte> vals)
{ {
Dc4(dst.Slice(I4DC4), top, topOffset); Dc4(dst.Slice(I4DC4), top, topOffset);
Tm4(dst.Slice(I4TM4), top, topOffset); Tm4(dst.Slice(I4TM4), top, topOffset);
Ve4(dst.Slice(I4VE4), top, topOffset); Ve4(dst.Slice(I4VE4), top, topOffset, vals);
He4(dst.Slice(I4HE4), top, topOffset); He4(dst.Slice(I4HE4), top, topOffset);
Rd4(dst.Slice(I4RD4), top, topOffset); Rd4(dst.Slice(I4RD4), top, topOffset);
Vr4(dst.Slice(I4VR4), top, topOffset); Vr4(dst.Slice(I4VR4), top, topOffset);
@ -395,20 +655,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
private static void Ve4(Span<byte> dst, Span<byte> top, int topOffset) private static void Ve4(Span<byte> dst, Span<byte> top, int topOffset, Span<byte> vals)
{ {
// vertical // vertical
byte[] vals = vals[0] = LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]);
{ vals[1] = LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]);
LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]), vals[2] = LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]);
LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]), vals[3] = LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4]);
LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]),
LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4])
};
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
vals.AsSpan().CopyTo(dst.Slice(i * WebpConstants.Bps)); vals.CopyTo(dst.Slice(i * WebpConstants.Bps));
} }
} }

22
src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs

@ -8,6 +8,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
internal class Vp8Histogram internal class Vp8Histogram
{ {
private readonly int[] scratch = new int[16];
private readonly short[] output = new short[16];
private readonly int[] distribution = new int[MaxCoeffThresh + 1];
/// <summary> /// <summary>
/// Size of histogram used by CollectHistogram. /// Size of histogram used by CollectHistogram.
/// </summary> /// </summary>
@ -40,23 +46,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public void CollectHistogram(Span<byte> reference, Span<byte> pred, int startBlock, int endBlock) public void CollectHistogram(Span<byte> reference, Span<byte> pred, int startBlock, int endBlock)
{ {
int j; int j;
int[] distribution = new int[MaxCoeffThresh + 1]; this.distribution.AsSpan().Clear();
for (j = startBlock; j < endBlock; j++) for (j = startBlock; j < endBlock; j++)
{ {
short[] output = new short[16]; this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output);
this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), output);
// Convert coefficients to bin. // Convert coefficients to bin.
for (int k = 0; k < 16; ++k) for (int k = 0; k < 16; ++k)
{ {
int v = Math.Abs(output[k]) >> 3; int v = Math.Abs(this.output[k]) >> 3;
int clippedValue = ClipMax(v, MaxCoeffThresh); int clippedValue = ClipMax(v, MaxCoeffThresh);
++distribution[clippedValue]; ++this.distribution[clippedValue];
} }
} }
this.SetHistogramData(distribution); this.SetHistogramData(this.distribution);
} }
public void Merge(Vp8Histogram other) public void Merge(Vp8Histogram other)
@ -97,7 +101,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
private void Vp8FTransform(Span<byte> src, Span<byte> reference, Span<short> output) private void Vp8FTransform(Span<byte> src, Span<byte> reference, Span<short> output)
{ {
int i; int i;
int[] tmp = new int[16]; Span<int> tmp = this.scratch;
tmp.Clear();
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
{ {
int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255]) int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255])

38
src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs

@ -3,7 +3,7 @@
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
internal class Vp8Matrix internal unsafe struct Vp8Matrix
{ {
private static readonly int[][] BiasMatrices = private static readonly int[][] BiasMatrices =
{ {
@ -23,41 +23,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
private const int SharpenBits = 11; private const int SharpenBits = 11;
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Vp8Matrix"/> class. /// The quantizer steps.
/// </summary> /// </summary>
public Vp8Matrix() public fixed ushort Q[16];
{
this.Q = new ushort[16];
this.IQ = new ushort[16];
this.Bias = new uint[16];
this.ZThresh = new uint[16];
this.Sharpen = new short[16];
}
/// <summary>
/// Gets the quantizer steps.
/// </summary>
public ushort[] Q { get; }
/// <summary> /// <summary>
/// Gets the reciprocals, fixed point. /// The reciprocals, fixed point.
/// </summary> /// </summary>
public ushort[] IQ { get; } public fixed ushort IQ[16];
/// <summary> /// <summary>
/// Gets the rounding bias. /// The rounding bias.
/// </summary> /// </summary>
public uint[] Bias { get; } public fixed uint Bias[16];
/// <summary> /// <summary>
/// Gets the value below which a coefficient is zeroed. /// The value below which a coefficient is zeroed.
/// </summary> /// </summary>
public uint[] ZThresh { get; } public fixed uint ZThresh[16];
/// <summary> /// <summary>
/// Gets the frequency boosters for slight sharpening. /// The frequency boosters for slight sharpening.
/// </summary> /// </summary>
public short[] Sharpen { get; } public fixed short Sharpen[16];
/// <summary> /// <summary>
/// Returns the average quantizer. /// Returns the average quantizer.
@ -72,7 +60,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int isAcCoeff = i > 0 ? 1 : 0; int isAcCoeff = i > 0 ? 1 : 0;
int bias = BiasMatrices[type][isAcCoeff]; int bias = BiasMatrices[type][isAcCoeff];
this.IQ[i] = (ushort)((1 << WebpConstants.QFix) / this.Q[i]); this.IQ[i] = (ushort)((1 << WebpConstants.QFix) / this.Q[i]);
this.Bias[i] = (uint)this.BIAS(bias); this.Bias[i] = (uint)BIAS(bias);
// zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is: // zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is:
// * zero if coeff <= zthresh // * zero if coeff <= zthresh
@ -106,6 +94,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return (sum + 8) >> 4; return (sum + 8) >> 4;
} }
private int BIAS(int b) => b << (WebpConstants.QFix - 8); private static int BIAS(int b) => b << (WebpConstants.QFix - 8);
} }
} }

11
src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs

@ -1,6 +1,8 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
/// <summary> /// <summary>
@ -93,6 +95,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary> /// </summary>
public int[,] Derr { get; } public int[,] Derr { get; }
public void Clear()
{
Array.Clear(this.YDcLevels, 0, this.YDcLevels.Length);
Array.Clear(this.YAcLevels, 0, this.YAcLevels.Length);
Array.Clear(this.UvLevels, 0, this.UvLevels.Length);
Array.Clear(this.ModesI4, 0, this.ModesI4.Length);
Array.Clear(this.Derr, 0, this.Derr.Length);
}
public void InitScore() public void InitScore()
{ {
this.D = 0; this.D = 0;

7
src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
@ -16,7 +17,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public int CoeffType { get; set; } public int CoeffType { get; set; }
public short[] Coeffs { get; set; } public short[] Coeffs { get; } = new short[16];
public Vp8BandProbas[] Prob { get; set; } public Vp8BandProbas[] Prob { get; set; }
@ -31,6 +32,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.Prob = prob.Coeffs[this.CoeffType]; this.Prob = prob.Coeffs[this.CoeffType];
this.Stats = prob.Stats[this.CoeffType]; this.Stats = prob.Stats[this.CoeffType];
this.Costs = prob.RemappedCosts[this.CoeffType]; this.Costs = prob.RemappedCosts[this.CoeffType];
this.Coeffs.AsSpan().Clear();
} }
public void SetCoeffs(Span<short> coeffs) public void SetCoeffs(Span<short> coeffs)
@ -46,7 +48,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
this.Coeffs = coeffs.Slice(0, 16).ToArray(); coeffs.Slice(0, 16).CopyTo(this.Coeffs);
} }
// Simulate block coding, but only record statistics. // Simulate block coding, but only record statistics.
@ -150,6 +152,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return cost; return cost;
} }
[MethodImpl(InliningOptions.ShortMethod)]
private static int LevelCost(Span<ushort> table, int level) private static int LevelCost(Span<ushort> table, int level)
=> WebpLookupTables.Vp8LevelFixedCosts[level] + table[level > WebpConstants.MaxVariableLevel ? WebpConstants.MaxVariableLevel : level]; => WebpLookupTables.Vp8LevelFixedCosts[level] + table[level > WebpConstants.MaxVariableLevel ? WebpConstants.MaxVariableLevel : level];

14
src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs

@ -8,19 +8,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
internal class Vp8SegmentInfo internal class Vp8SegmentInfo
{ {
/// <summary> /// <summary>
/// Gets or sets the quantization matrix y1. /// Gets the quantization matrix y1.
/// </summary> /// </summary>
public Vp8Matrix Y1 { get; set; } #pragma warning disable SA1401 // Fields should be private
public Vp8Matrix Y1;
/// <summary> /// <summary>
/// Gets or sets the quantization matrix y2. /// Gets the quantization matrix y2.
/// </summary> /// </summary>
public Vp8Matrix Y2 { get; set; } public Vp8Matrix Y2;
/// <summary> /// <summary>
/// Gets or sets the quantization matrix uv. /// Gets the quantization matrix uv.
/// </summary> /// </summary>
public Vp8Matrix Uv { get; set; } public Vp8Matrix Uv;
#pragma warning restore SA1401 // Fields should be private
/// <summary> /// <summary>
/// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness.

54
src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs

@ -34,6 +34,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary> /// </summary>
private readonly Configuration configuration; private readonly Configuration configuration;
/// <summary>
/// Scratch buffer to reduce allocations.
/// </summary>
private readonly int[] scratch = new int[16];
/// <summary>
/// Another scratch buffer to reduce allocations.
/// </summary>
private readonly byte[] scratchBytes = new byte[4];
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="WebpLossyDecoder"/> class. /// Initializes a new instance of the <see cref="WebpLossyDecoder"/> class.
/// </summary> /// </summary>
@ -395,7 +405,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
LossyUtils.TM4(dst, yuv, offset); LossyUtils.TM4(dst, yuv, offset);
break; break;
case 2: case 2:
LossyUtils.VE4(dst, yuv, offset); LossyUtils.VE4(dst, yuv, offset, this.scratchBytes);
break; break;
case 3: case 3:
LossyUtils.HE4(dst, yuv, offset); LossyUtils.HE4(dst, yuv, offset);
@ -420,7 +430,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
break; break;
} }
this.DoTransform(bits, coeffs.AsSpan(n * 16), dst); this.DoTransform(bits, coeffs.AsSpan(n * 16), dst, this.scratch);
} }
} }
else else
@ -456,7 +466,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
for (int n = 0; n < 16; ++n, bits <<= 2) for (int n = 0; n < 16; ++n, bits <<= 2)
{ {
this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n])); this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n]), this.scratch);
} }
} }
} }
@ -496,8 +506,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
break; break;
} }
this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst); this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst, this.scratch);
this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst); this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst, this.scratch);
// Stash away top samples for next block. // Stash away top samples for next block.
if (mby < dec.MbHeight - 1) if (mby < dec.MbHeight - 1)
@ -737,21 +747,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
int xStep = 3; int xStep = 3;
int lastPixelPair = (len - 1) >> 1; int lastPixelPair = (len - 1) >> 1;
uint tluv = LossyUtils.LoadUv(topU[0], topV[0]); // top-left sample uint tluv = YuvConversion.LoadUv(topU[0], topV[0]); // top-left sample
uint luv = LossyUtils.LoadUv(curU[0], curV[0]); // left-sample uint luv = YuvConversion.LoadUv(curU[0], curV[0]); // left-sample
uint uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2; uint uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(topY[0], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst); YuvConversion.YuvToBgr(topY[0], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst);
if (bottomY != null) if (bottomY != null)
{ {
uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2; uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(bottomY[0], (int)uv0 & 0xff, (int)(uv0 >> 16), bottomDst); YuvConversion.YuvToBgr(bottomY[0], (int)uv0 & 0xff, (int)(uv0 >> 16), bottomDst);
} }
for (int x = 1; x <= lastPixelPair; x++) for (int x = 1; x <= lastPixelPair; x++)
{ {
uint tuv = LossyUtils.LoadUv(topU[x], topV[x]); // top sample uint tuv = YuvConversion.LoadUv(topU[x], topV[x]); // top sample
uint uv = LossyUtils.LoadUv(curU[x], curV[x]); // sample uint uv = YuvConversion.LoadUv(curU[x], curV[x]); // sample
// Precompute invariant values associated with first and second diagonals. // Precompute invariant values associated with first and second diagonals.
uint avg = tluv + tuv + luv + uv + 0x00080008u; uint avg = tluv + tuv + luv + uv + 0x00080008u;
@ -760,15 +770,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
uv0 = (diag12 + tluv) >> 1; uv0 = (diag12 + tluv) >> 1;
uint uv1 = (diag03 + tuv) >> 1; uint uv1 = (diag03 + tuv) >> 1;
int xMul2 = x * 2; int xMul2 = x * 2;
LossyUtils.YuvToBgr(topY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((xMul2 - 1) * xStep)); YuvConversion.YuvToBgr(topY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((xMul2 - 1) * xStep));
LossyUtils.YuvToBgr(topY[xMul2 - 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), topDst.Slice((xMul2 - 0) * xStep)); YuvConversion.YuvToBgr(topY[xMul2 - 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), topDst.Slice((xMul2 - 0) * xStep));
if (bottomY != null) if (bottomY != null)
{ {
uv0 = (diag03 + luv) >> 1; uv0 = (diag03 + luv) >> 1;
uv1 = (diag12 + uv) >> 1; uv1 = (diag12 + uv) >> 1;
LossyUtils.YuvToBgr(bottomY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((xMul2 - 1) * xStep)); YuvConversion.YuvToBgr(bottomY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((xMul2 - 1) * xStep));
LossyUtils.YuvToBgr(bottomY[xMul2 + 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), bottomDst.Slice((xMul2 + 0) * xStep)); YuvConversion.YuvToBgr(bottomY[xMul2 + 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), bottomDst.Slice((xMul2 + 0) * xStep));
} }
tluv = tuv; tluv = tuv;
@ -778,21 +788,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
if ((len & 1) == 0) if ((len & 1) == 0)
{ {
uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2; uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(topY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((len - 1) * xStep)); YuvConversion.YuvToBgr(topY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((len - 1) * xStep));
if (bottomY != null) if (bottomY != null)
{ {
uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2; uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(bottomY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((len - 1) * xStep)); YuvConversion.YuvToBgr(bottomY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((len - 1) * xStep));
} }
} }
} }
private void DoTransform(uint bits, Span<short> src, Span<byte> dst) private void DoTransform(uint bits, Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
switch (bits >> 30) switch (bits >> 30)
{ {
case 3: case 3:
LossyUtils.TransformOne(src, dst); LossyUtils.TransformOne(src, dst, scratch);
break; break;
case 2: case 2:
LossyUtils.TransformAc3(src, dst); LossyUtils.TransformAc3(src, dst);
@ -803,7 +813,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
private void DoUVTransform(uint bits, Span<short> src, Span<byte> dst) private void DoUVTransform(uint bits, Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
// any non-zero coeff at all? // any non-zero coeff at all?
if ((bits & 0xff) > 0) if ((bits & 0xff) > 0)
@ -811,7 +821,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// any non-zero AC coefficient? // any non-zero AC coefficient?
if ((bits & 0xaa) > 0) if ((bits & 0xaa) > 0)
{ {
LossyUtils.TransformUv(src, dst); // note we don't use the AC3 variant for U/V. LossyUtils.TransformUv(src, dst, scratch); // note we don't use the AC3 variant for U/V.
} }
else else
{ {
@ -884,7 +894,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
if (nz > 1) if (nz > 1)
{ {
// More than just the DC -> perform the full transform. // More than just the DC -> perform the full transform.
LossyUtils.TransformWht(dc, dst); LossyUtils.TransformWht(dc, dst, this.scratch);
} }
else else
{ {

31
src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs

@ -299,5 +299,36 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
uv = (uv + rounding + (128 << (YuvFix + 2))) >> (YuvFix + 2); uv = (uv + rounding + (128 << (YuvFix + 2))) >> (YuvFix + 2);
return (uv & ~0xff) == 0 ? uv : uv < 0 ? 0 : 255; return (uv & ~0xff) == 0 ? uv : uv < 0 ? 0 : 255;
} }
[MethodImpl(InliningOptions.ShortMethod)]
public static uint LoadUv(byte u, byte v) =>
(uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each).
[MethodImpl(InliningOptions.ShortMethod)]
public static void YuvToBgr(int y, int u, int v, Span<byte> bgr)
{
bgr[2] = (byte)YuvToR(y, v);
bgr[1] = (byte)YuvToG(y, u, v);
bgr[0] = (byte)YuvToB(y, u);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToB(int y, int u) => Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685);
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToG(int y, int u, int v) => Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708);
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToR(int y, int v) => Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234);
[MethodImpl(InliningOptions.ShortMethod)]
private static int MultHi(int v, int coeff) => (v * coeff) >> 8;
[MethodImpl(InliningOptions.ShortMethod)]
private static byte Clip8(int v)
{
int yuvMask = (256 << 6) - 1;
return (byte)((v & ~yuvMask) == 0 ? v >> 6 : v < 0 ? 0 : 255);
}
} }
} }

56
src/ImageSharp/Formats/Webp/WebpCommonUtils.cs

@ -16,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp
/// </summary> /// </summary>
internal static class WebpCommonUtils internal static class WebpCommonUtils
{ {
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<byte> AlphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
private static readonly Vector256<byte> All0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
private static readonly Vector128<byte> AlphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
private static readonly Vector128<byte> All0x80 = Vector128.Create((byte)0x80).AsByte();
#endif
/// <summary> /// <summary>
/// Checks if the pixel row is not opaque. /// Checks if the pixel row is not opaque.
/// </summary> /// </summary>
@ -27,11 +37,6 @@ namespace SixLabors.ImageSharp.Formats.Webp
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector256<byte> all0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
int i = 0; int i = 0;
int length = (row.Length * 4) - 3; int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes) fixed (byte* src = rowBytes)
@ -42,14 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Webp
Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte(); Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte();
Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte(); Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte();
Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte(); Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte();
Vector256<int> b0 = Avx2.And(a0, alphaMaskVector256).AsInt32(); Vector256<int> b0 = Avx2.And(a0, AlphaMaskVector256).AsInt32();
Vector256<int> b1 = Avx2.And(a1, alphaMaskVector256).AsInt32(); Vector256<int> b1 = Avx2.And(a1, AlphaMaskVector256).AsInt32();
Vector256<int> b2 = Avx2.And(a2, alphaMaskVector256).AsInt32(); Vector256<int> b2 = Avx2.And(a2, AlphaMaskVector256).AsInt32();
Vector256<int> b3 = Avx2.And(a3, alphaMaskVector256).AsInt32(); Vector256<int> b3 = Avx2.And(a3, AlphaMaskVector256).AsInt32();
Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16(); Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16();
Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16(); Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16();
Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte(); Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte();
Vector256<byte> bits = Avx2.CompareEqual(d, all0x80Vector256); Vector256<byte> bits = Avx2.CompareEqual(d, All0x80Vector256);
int mask = Avx2.MoveMask(bits); int mask = Avx2.MoveMask(bits);
if (mask != -1) if (mask != -1)
{ {
@ -59,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 64 <= length; i += 64) for (; i + 64 <= length; i += 64)
{ {
if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque64Bytes(src, i))
{ {
return true; return true;
} }
@ -67,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32) for (; i + 32 <= length; i += 32)
{ {
if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque32Bytes(src, i))
{ {
return true; return true;
} }
@ -85,16 +90,13 @@ namespace SixLabors.ImageSharp.Formats.Webp
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
int i = 0; int i = 0;
int length = (row.Length * 4) - 3; int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes) fixed (byte* src = rowBytes)
{ {
for (; i + 64 <= length; i += 64) for (; i + 64 <= length; i += 64)
{ {
if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque64Bytes(src, i))
{ {
return true; return true;
} }
@ -102,7 +104,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32) for (; i + 32 <= length; i += 32)
{ {
if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque32Bytes(src, i))
{ {
return true; return true;
} }
@ -133,20 +135,20 @@ namespace SixLabors.ImageSharp.Formats.Webp
} }
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80) private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte(); Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte();
Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte(); Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32(); Vector128<int> b0 = Sse2.And(a0, AlphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32(); Vector128<int> b1 = Sse2.And(a1, AlphaMask).AsInt32();
Vector128<int> b2 = Sse2.And(a2, alphaMask).AsInt32(); Vector128<int> b2 = Sse2.And(a2, AlphaMask).AsInt32();
Vector128<int> b3 = Sse2.And(a3, alphaMask).AsInt32(); Vector128<int> b3 = Sse2.And(a3, AlphaMask).AsInt32();
Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16(); Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte(); Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80); Vector128<byte> bits = Sse2.CompareEqual(d, All0x80);
int mask = Sse2.MoveMask(bits); int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF) if (mask != 0xFFFF)
{ {
@ -156,15 +158,15 @@ namespace SixLabors.ImageSharp.Formats.Webp
return false; return false;
} }
private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80) private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32(); Vector128<int> b0 = Sse2.And(a0, AlphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32(); Vector128<int> b1 = Sse2.And(a1, AlphaMask).AsInt32();
Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte(); Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80); Vector128<byte> bits = Sse2.CompareEqual(d, All0x80);
int mask = Sse2.MoveMask(bits); int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF) if (mask != 0xFFFF)
{ {

2
src/ImageSharp/Formats/Webp/WebpDecoderCore.cs

@ -306,7 +306,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
// Check for VP8 magic bytes. // Check for VP8 magic bytes.
this.currentStream.Read(this.buffer, 0, 3); this.currentStream.Read(this.buffer, 0, 3);
if (!this.buffer.AsSpan().Slice(0, 3).SequenceEqual(WebpConstants.Vp8HeaderMagicBytes)) if (!this.buffer.AsSpan(0, 3).SequenceEqual(WebpConstants.Vp8HeaderMagicBytes))
{ {
WebpThrowHelper.ThrowImageFormatException("VP8 magic bytes not found"); WebpThrowHelper.ThrowImageFormatException("VP8 magic bytes not found");
} }

2
src/ImageSharp/Formats/Webp/WebpEncoder.cs

@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
public bool UseAlphaCompression { get; set; } public bool UseAlphaCompression { get; set; }
/// <inheritdoc/> /// <inheritdoc/>
public int EntropyPasses { get; set; } public int EntropyPasses { get; set; } = 1;
/// <inheritdoc/> /// <inheritdoc/>
public int SpatialNoiseShaping { get; set; } = 50; public int SpatialNoiseShaping { get; set; } = 50;

2
src/ImageSharp/Formats/Webp/WebpEncoderCore.cs

@ -4,11 +4,9 @@
using System.IO; using System.IO;
using System.Threading; using System.Threading;
using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Formats.Bmp;
using SixLabors.ImageSharp.Formats.Webp.Lossless; using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Metadata;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Webp namespace SixLabors.ImageSharp.Formats.Webp

267
src/ImageSharp/Formats/Webp/WebpLookupTables.cs

@ -2,21 +2,13 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Collections.Generic; using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp namespace SixLabors.ImageSharp.Formats.Webp
{ {
#pragma warning disable SA1201 // Elements should appear in the correct order #pragma warning disable SA1201 // Elements should appear in the correct order
internal static class WebpLookupTables internal static class WebpLookupTables
{ {
public static readonly Dictionary<int, byte> Abs0;
public static readonly Dictionary<int, byte> Clip1;
public static readonly Dictionary<int, sbyte> Sclip1;
public static readonly Dictionary<int, sbyte> Sclip2;
public static readonly byte[,][] ModesProba = new byte[10, 10][]; public static readonly byte[,][] ModesProba = new byte[10, 10][];
public static readonly ushort[] GammaToLinearTab = new ushort[256]; public static readonly ushort[] GammaToLinearTab = new ushort[256];
@ -54,6 +46,18 @@ namespace SixLabors.ImageSharp.Formats.Webp
8 + (0 * WebpConstants.Bps), 12 + (0 * WebpConstants.Bps), 8 + (4 * WebpConstants.Bps), 12 + (4 * WebpConstants.Bps) // V 8 + (0 * WebpConstants.Bps), 12 + (0 * WebpConstants.Bps), 8 + (4 * WebpConstants.Bps), 12 + (4 * WebpConstants.Bps) // V
}; };
[MethodImpl(InliningOptions.ShortMethod)]
public static byte Abs0(int x) => Abs0Table[x + 255];
[MethodImpl(InliningOptions.ShortMethod)]
public static sbyte Sclip1(int x) => Sclip1Table[x + 1020];
[MethodImpl(InliningOptions.ShortMethod)]
public static sbyte Sclip2(int x) => Sclip2Table[x + 112];
[MethodImpl(InliningOptions.ShortMethod)]
public static byte Clip1(int x) => Clip1Table[x + 255];
// fixed costs for coding levels, deduce from the coding tree. // fixed costs for coding levels, deduce from the coding tree.
// This is only the part that doesn't depend on the probability state. // This is only the part that doesn't depend on the probability state.
public static readonly short[] Vp8LevelFixedCosts = public static readonly short[] Vp8LevelFixedCosts =
@ -249,7 +253,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
0 0
}; };
public static readonly byte[] NewRange = // This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> NewRange => new byte[]
{ {
// range = ((range + 1) << kVP8Log2Range[range]) - 1 // range = ((range + 1) << kVP8Log2Range[range]) - 1
127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239, 127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239,
@ -567,7 +572,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
}; };
// Paragraph 14.1 // Paragraph 14.1
public static readonly byte[] DcTable = // This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> DcTable => new byte[]
{ {
4, 5, 6, 7, 8, 9, 10, 10, 4, 5, 6, 7, 8, 9, 10, 10,
11, 12, 13, 14, 15, 16, 17, 17, 11, 12, 13, 14, 15, 16, 17, 17,
@ -1042,7 +1048,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
(17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7),
}; };
public static readonly byte[] PrefixEncodeExtraBitsValue = // This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> PrefixEncodeExtraBitsValue => new byte[]
{ {
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3,
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
@ -1233,33 +1240,223 @@ namespace SixLabors.ImageSharp.Formats.Webp
LinearToGammaTab[v] = (int)((255.0d * Math.Pow(scale * v, 1.0d / WebpConstants.Gamma)) + .5); LinearToGammaTab[v] = (int)((255.0d * Math.Pow(scale * v, 1.0d / WebpConstants.Gamma)) + .5);
} }
Abs0 = new Dictionary<int, byte>(); InitializeModesProbabilities();
for (int i = -255; i <= 255; i++) InitializeFixedCostsI4();
{ }
Abs0[i] = (byte)((i < 0) ? -i : i);
}
Clip1 = new Dictionary<int, byte>(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
for (int i = -255; i <= 255 + 255; i++) private static ReadOnlySpan<byte> Abs0Table => new byte[]
{ {
Clip1[i] = (byte)(i < 0 ? 0 : i > 255 ? 255 : i); 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xef,
} 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8, 0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde,
0xdd, 0xdc, 0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd,
0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc,
0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xab,
0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a,
0x99, 0x98, 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89,
0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x78,
0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67,
0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56,
0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45,
0x44, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23,
0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12,
0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21,
0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32,
0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43,
0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54,
0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65,
0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc,
0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed,
0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
0xff
};
Sclip1 = new Dictionary<int, sbyte>(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
for (int i = -1020; i <= 1020; i++) private static ReadOnlySpan<byte> Clip1Table => new byte[]
{ {
Sclip1[i] = (sbyte)(i < -128 ? -128 : i > 127 ? 127 : i); 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
} 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21,
0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32,
0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43,
0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54,
0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65,
0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc,
0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed,
0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff
};
Sclip2 = new Dictionary<int, sbyte>(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
for (int i = -112; i <= 112; i++) private static ReadOnlySpan<sbyte> Sclip1Table => new sbyte[]
{ {
Sclip2[i] = (sbyte)(i < -16 ? -16 : i > 15 ? 15 : i); -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
} -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -127, -126, -125, -124, -123, -122, -121, -120,
-119, -118, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103,
-102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83,
-82, -81, -80, -79, -78, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62,
-61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41,
-40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20,
-19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127
};
InitializeModesProbabilities(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
InitializeFixedCostsI4(); private static ReadOnlySpan<sbyte> Sclip2Table => new sbyte[]
} {
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -15, -14, -13, -12, -11, -10, -9, -8,
-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15
};
private static void InitializeModesProbabilities() private static void InitializeModesProbabilities()
{ {

4
src/ImageSharp/IO/ChunkedMemoryStream.cs

@ -243,7 +243,7 @@ namespace SixLabors.ImageSharp.IO
const string bufferMessage = "Offset subtracted from the buffer length is less than count."; const string bufferMessage = "Offset subtracted from the buffer length is less than count.";
Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage); Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage);
return this.ReadImpl(buffer.AsSpan().Slice(offset, count)); return this.ReadImpl(buffer.AsSpan(offset, count));
} }
#if SUPPORTS_SPAN_STREAM #if SUPPORTS_SPAN_STREAM
@ -359,7 +359,7 @@ namespace SixLabors.ImageSharp.IO
const string bufferMessage = "Offset subtracted from the buffer length is less than count."; const string bufferMessage = "Offset subtracted from the buffer length is less than count.";
Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage); Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage);
this.WriteImpl(buffer.AsSpan().Slice(offset, count)); this.WriteImpl(buffer.AsSpan(offset, count));
} }
#if SUPPORTS_SPAN_STREAM #if SUPPORTS_SPAN_STREAM

51
src/ImageSharp/Image.Decode.cs

@ -58,31 +58,42 @@ namespace SixLabors.ImageSharp
return null; return null;
} }
using (IMemoryOwner<byte> buffer = config.MemoryAllocator.Allocate<byte>(headerSize, AllocationOptions.Clean)) // Header sizes are so small, that headersBuffer will be always stackalloc-ed in practice,
// and heap allocation will never happen, there is no need for the usual try-finally ArrayPool dance.
// The array case is only a safety mechanism following stackalloc best practices.
Span<byte> headersBuffer = headerSize > 512 ? new byte[headerSize] : stackalloc byte[headerSize];
long startPosition = stream.Position;
// Read doesn't always guarantee the full returned length so read a byte
// at a time until we get either our count or hit the end of the stream.
int n = 0;
int i;
do
{ {
Span<byte> bufferSpan = buffer.GetSpan(); i = stream.Read(headersBuffer, n, headerSize - n);
long startPosition = stream.Position; n += i;
}
while (n < headerSize && i > 0);
// Read doesn't always guarantee the full returned length so read a byte stream.Position = startPosition;
// at a time until we get either our count or hit the end of the stream.
int n = 0; // Does the given stream contain enough data to fit in the header for the format
int i; // and does that data match the format specification?
do // Individual formats should still check since they are public.
IImageFormat format = null;
foreach (IImageFormatDetector formatDetector in config.ImageFormatsManager.FormatDetectors)
{
if (formatDetector.HeaderSize <= headerSize)
{ {
i = stream.Read(bufferSpan, n, headerSize - n); IImageFormat attemptFormat = formatDetector.DetectFormat(headersBuffer);
n += i; if (attemptFormat != null)
{
format = attemptFormat;
}
} }
while (n < headerSize && i > 0);
stream.Position = startPosition;
// Does the given stream contain enough data to fit in the header for the format
// and does that data match the format specification?
// Individual formats should still check since they are public.
return config.ImageFormatsManager.FormatDetectors
.Where(x => x.HeaderSize <= headerSize)
.Select(x => x.DetectFormat(buffer.GetSpan())).LastOrDefault(x => x != null);
} }
return format;
} }
/// <summary> /// <summary>

47
src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs

@ -417,19 +417,40 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization
for (int r = 1; r < IndexCount; r++) for (int r = 1; r < IndexCount; r++)
{ {
// Currently, RyuJIT hoists the invariants of multi-level nested loop only to the
// immediate outer loop. See https://github.com/dotnet/runtime/issues/61420
// To ensure the calculation doesn't happen repeatedly, hoist some of the calculations
// in the form of ind1* manually.
int ind1R = (r << ((IndexBits * 2) + IndexAlphaBits)) +
(r << (IndexBits + IndexAlphaBits + 1)) +
(r << (IndexBits * 2)) +
(r << (IndexBits + 1)) +
r;
volumeSpan.Clear(); volumeSpan.Clear();
for (int g = 1; g < IndexCount; g++) for (int g = 1; g < IndexCount; g++)
{ {
int ind1G = ind1R +
(g << (IndexBits + IndexAlphaBits)) +
(g << IndexBits) +
g;
int r_g = r + g;
areaSpan.Clear(); areaSpan.Clear();
for (int b = 1; b < IndexCount; b++) for (int b = 1; b < IndexCount; b++)
{ {
int ind1B = ind1G +
((r_g + b) << IndexAlphaBits) +
b;
Moment line = default; Moment line = default;
for (int a = 1; a < IndexAlphaCount; a++) for (int a = 1; a < IndexAlphaCount; a++)
{ {
int ind1 = GetPaletteIndex(r, g, b, a); int ind1 = ind1B + a;
line += momentSpan[ind1]; line += momentSpan[ind1];
areaSpan[a] += line; areaSpan[a] += line;
@ -628,13 +649,35 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization
for (int r = cube.RMin + 1; r <= cube.RMax; r++) for (int r = cube.RMin + 1; r <= cube.RMax; r++)
{ {
// Currently, RyuJIT hoists the invariants of multi-level nested loop only to the
// immediate outer loop. See https://github.com/dotnet/runtime/issues/61420
// To ensure the calculation doesn't happen repeatedly, hoist some of the calculations
// in the form of ind1* manually.
int ind1R = (r << ((IndexBits * 2) + IndexAlphaBits)) +
(r << (IndexBits + IndexAlphaBits + 1)) +
(r << (IndexBits * 2)) +
(r << (IndexBits + 1)) +
r;
for (int g = cube.GMin + 1; g <= cube.GMax; g++) for (int g = cube.GMin + 1; g <= cube.GMax; g++)
{ {
int ind1G = ind1R +
(g << (IndexBits + IndexAlphaBits)) +
(g << IndexBits) +
g;
int r_g = r + g;
for (int b = cube.BMin + 1; b <= cube.BMax; b++) for (int b = cube.BMin + 1; b <= cube.BMax; b++)
{ {
int ind1B = ind1G +
((r_g + b) << IndexAlphaBits) +
b;
for (int a = cube.AMin + 1; a <= cube.AMax; a++) for (int a = cube.AMin + 1; a <= cube.AMax; a++)
{ {
tagSpan[GetPaletteIndex(r, g, b, a)] = label; int index = ind1B + a;
tagSpan[index] = label;
} }
} }
} }

2
src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs

@ -216,7 +216,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms
ResizeKernel kernel = this.CreateKernel(dataRowIndex, left, right); ResizeKernel kernel = this.CreateKernel(dataRowIndex, left, right);
Span<double> kernelValues = this.tempValues.AsSpan().Slice(0, kernel.Length); Span<double> kernelValues = this.tempValues.AsSpan(0, kernel.Length);
double sum = 0; double sum = 0;
for (int j = left; j <= right; j++) for (int j = left; j <= right; j++)

49
tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs

@ -76,34 +76,29 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
return image.Height; return image.Height;
} }
/* Results 17.06.2021 /* Results 04.11.2021
* BenchmarkDotNet=v0.12.0, OS=Windows 10.0.18362 * BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19043.1320 (21H1/May2021Update)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=3.1.202 .NET SDK=6.0.100-rc.2.21505.57
[Host] : .NET Core 3.1.4 (CoreCLR 4.700.20.20201, CoreFX 4.700.20.22101), X64 RyuJIT [Host] : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT
Job-AQFZAV : .NET Framework 4.8 (4.8.4180.0), X64 RyuJIT Job-WQLXJO : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT
Job-YCDAPQ : .NET Core 2.1.18 (CoreCLR 4.6.28801.04, CoreFX 4.6.28802.05), X64 RyuJIT Job-OJJAMD : .NET Core 3.1.20 (CoreCLR 4.700.21.47003, CoreFX 4.700.21.47101), X64 RyuJIT
Job-WMTYOZ : .NET Core 3.1.4 (CoreCLR 4.700.20.20201, CoreFX 4.700.20.22101), X64 RyuJIT Job-OMFOAS : .NET Framework 4.8 (4.8.4420.0), X64 RyuJIT
IterationCount=3 LaunchCount=1 WarmupCount=3 | Method | Job | Runtime | Arguments | TestImageLossy | TestImageLossless | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated |
| Method | Job | Runtime | TestImageLossy | TestImageLossless | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated | |--------------------------- |----------- |--------------------- |---------------------- |---------------------- |------------------------- |-----------:|----------:|--------:|---------:|------:|------:|----------:|
|--------------------------- |----------- |-------------- |---------------------- |------------------------- |-----------:|----------:|---------:|----------:|----------:|------:|------------:| | 'Magick Lossy Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 107.9 ms | 28.91 ms | 1.58 ms | - | - | - | 25 KB |
| 'Magick Lossy Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.8 ms | 6.28 ms | 0.34 ms | - | - | - | 17.65 KB | | 'ImageSharp Lossy Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 282.3 ms | 25.40 ms | 1.39 ms | 500.0000 | - | - | 2,428 KB |
| 'ImageSharp Lossy Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,145.0 ms | 110.82 ms | 6.07 ms | - | - | - | 2779.53 KB | | 'Magick Lossless Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.3 ms | 11.99 ms | 0.66 ms | - | - | - | 16 KB |
| 'Magick Lossless Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 145.9 ms | 8.55 ms | 0.47 ms | - | - | - | 18.05 KB | | 'ImageSharp Lossless Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 280.2 ms | 6.21 ms | 0.34 ms | - | - | - | 2,092 KB |
| 'ImageSharp Lossless Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,694.1 ms | 55.09 ms | 3.02 ms | 4000.0000 | 1000.0000 | - | 30556.87 KB | | 'Magick Lossy Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.2 ms | 9.32 ms | 0.51 ms | - | - | - | 15 KB |
| 'Magick Lossy Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.7 ms | 1.89 ms | 0.10 ms | - | - | - | 15.75 KB | | 'ImageSharp Lossy Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 295.8 ms | 21.25 ms | 1.16 ms | 500.0000 | - | - | 2,427 KB |
| 'ImageSharp Lossy Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 741.6 ms | 21.45 ms | 1.18 ms | - | - | - | 2767.85 KB | | 'Magick Lossless Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.5 ms | 4.07 ms | 0.22 ms | - | - | - | 15 KB |
| 'Magick Lossless Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.1 ms | 9.52 ms | 0.52 ms | - | - | - | 16.54 KB | | 'ImageSharp Lossless Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 464.0 ms | 55.70 ms | 3.05 ms | - | - | - | 2,090 KB |
| 'ImageSharp Lossless Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 522.5 ms | 21.15 ms | 1.16 ms | 4000.0000 | 1000.0000 | - | 22860.02 KB | | 'Magick Lossy Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 108.0 ms | 29.60 ms | 1.62 ms | - | - | - | 32 KB |
| 'Magick Lossy Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.9 ms | 5.34 ms | 0.29 ms | - | - | - | 15.45 KB | | 'ImageSharp Lossy Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 564.9 ms | 29.69 ms | 1.63 ms | - | - | - | 2,436 KB |
| 'ImageSharp Lossy Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 748.8 ms | 290.47 ms | 15.92 ms | - | - | - | 2767.84 KB | | 'Magick Lossless Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.2 ms | 4.74 ms | 0.26 ms | - | - | - | 18 KB |
| 'Magick Lossless Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.1 ms | 1.14 ms | 0.06 ms | - | - | - | 15.9 KB | | 'ImageSharp Lossless Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,767.5 ms | 106.33 ms | 5.83 ms | - | - | - | 9,729 KB |
| 'ImageSharp Lossless Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 480.7 ms | 25.25 ms | 1.38 ms | 4000.0000 | 1000.0000 | - | 22859.7 KB |
| 'Magick Lossy Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.7 ms | 4.71 ms | 0.26 ms | - | - | - | 15.48 KB |
| 'ImageSharp Lossy Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 645.7 ms | 61.00 ms | 3.34 ms | - | - | - | 2768.13 KB |
| 'Magick Lossless Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.5 ms | 18.63 ms | 1.02 ms | - | - | - | 15.8 KB |
| 'ImageSharp Lossless Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 306.7 ms | 32.31 ms | 1.77 ms | 4000.0000 | 1000.0000 | - | 22860.02 KB |
*/ */
} }
} }

2
tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs

@ -85,7 +85,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
public void PngCoreWuNoDither() public void PngCoreWuNoDither()
{ {
using var memoryStream = new MemoryStream(); using var memoryStream = new MemoryStream();
var options = new PngEncoder { Quantizer = new WuQuantizer(new QuantizerOptions { Dither = null }) }; var options = new PngEncoder { Quantizer = new WuQuantizer(new QuantizerOptions { Dither = null }), ColorType = PngColorType.Palette };
this.bmpCore.SaveAsPng(memoryStream, options); this.bmpCore.SaveAsPng(memoryStream, options);
} }
} }

101
tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs

@ -4,6 +4,7 @@
using System.IO; using System.IO;
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
using ImageMagick; using ImageMagick;
using ImageMagick.Formats;
using SixLabors.ImageSharp.Formats.Webp; using SixLabors.ImageSharp.Formats.Webp;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests; using SixLabors.ImageSharp.Tests;
@ -44,8 +45,22 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
public void MagickWebpLossy() public void MagickWebpLossy()
{ {
using var memoryStream = new MemoryStream(); using var memoryStream = new MemoryStream();
this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "lossless", false);
this.webpMagick.Write(memoryStream, MagickFormat.WebP); var defines = new WebPWriteDefines
{
Lossless = false,
Method = 4,
AlphaCompression = WebPAlphaCompression.None,
FilterStrength = 60,
SnsStrength = 50,
Pass = 1,
// 100 means off.
NearLossless = 100
};
this.webpMagick.Quality = 75;
this.webpMagick.Write(memoryStream, defines);
} }
[Benchmark(Description = "ImageSharp Webp Lossy")] [Benchmark(Description = "ImageSharp Webp Lossy")]
@ -54,7 +69,12 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
using var memoryStream = new MemoryStream(); using var memoryStream = new MemoryStream();
this.webp.Save(memoryStream, new WebpEncoder() this.webp.Save(memoryStream, new WebpEncoder()
{ {
FileFormat = WebpFileFormatType.Lossy FileFormat = WebpFileFormatType.Lossy,
Method = WebpEncodingMethod.Level4,
UseAlphaCompression = false,
FilterStrength = 60,
SpatialNoiseShaping = 50,
EntropyPasses = 1
}); });
} }
@ -62,8 +82,17 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
public void MagickWebpLossless() public void MagickWebpLossless()
{ {
using var memoryStream = new MemoryStream(); using var memoryStream = new MemoryStream();
this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "lossless", true); var defines = new WebPWriteDefines
this.webpMagick.Write(memoryStream, MagickFormat.WebP); {
Lossless = true,
Method = 4,
// 100 means off.
NearLossless = 100
};
this.webpMagick.Quality = 75;
this.webpMagick.Write(memoryStream, defines);
} }
[Benchmark(Description = "ImageSharp Webp Lossless")] [Benchmark(Description = "ImageSharp Webp Lossless")]
@ -72,41 +101,43 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
using var memoryStream = new MemoryStream(); using var memoryStream = new MemoryStream();
this.webp.Save(memoryStream, new WebpEncoder() this.webp.Save(memoryStream, new WebpEncoder()
{ {
FileFormat = WebpFileFormatType.Lossless FileFormat = WebpFileFormatType.Lossless,
Method = WebpEncodingMethod.Level4,
NearLossless = false,
// This is equal to exact = false in libwebp, which is the default.
TransparentColorMode = WebpTransparentColorMode.Clear
}); });
} }
/* Results 17.06.2021 /* Results 04.11.2021
* Summary * * Summary *
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.630 (2004/?/20H1) BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19043.1320 (21H1/May2021Update)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=5.0.100 .NET SDK=6.0.100-rc.2.21505.57
[Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT [Host] : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT
Job-OUUGWL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT Job-WQLXJO : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT
Job-GAIITM : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT Job-OJJAMD : .NET Core 3.1.20 (CoreCLR 4.700.21.47003, CoreFX 4.700.21.47101), X64 RyuJIT
Job-HWOBSO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT Job-OMFOAS : .NET Framework 4.8 (4.8.4420.0), X64 RyuJIT
| Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | IterationCount=3 LaunchCount=1 WarmupCount=3
|--------------------------- |----------- |-------------- |------------- |----------:|-----------:|----------:|------:|--------:|-----------:|----------:|----------:|-------------:|
| 'Magick Webp Lossy' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 23.30 ms | 0.869 ms | 0.048 ms | 0.14 | 0.00 | - | - | - | 68.19 KB | | Method | Job | Runtime | Arguments | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
| 'ImageSharp Webp Lossy' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 68.22 ms | 16.454 ms | 0.902 ms | 0.42 | 0.01 | 6125.0000 | 125.0000 | - | 26359.49 KB | |--------------------------- |----------- |--------------------- |---------------------- |------------- |----------:|----------:|---------:|------:|--------:|------------:|----------:|----------:|-----------:|
| 'Magick Webp Lossless' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 161.96 ms | 9.879 ms | 0.541 ms | 1.00 | 0.00 | - | - | - | 520.28 KB | | 'Magick Webp Lossy' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 23.33 ms | 1.491 ms | 0.082 ms | 0.15 | 0.00 | - | - | - | 67 KB |
| 'ImageSharp Webp Lossless' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 370.88 ms | 58.875 ms | 3.227 ms | 2.29 | 0.02 | 34000.0000 | 5000.0000 | 2000.0000 | 163177.15 KB | | 'ImageSharp Webp Lossy' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 245.80 ms | 24.288 ms | 1.331 ms | 1.53 | 0.01 | 135000.0000 | - | - | 552,713 KB |
| | | | | | | | | | | | | | | 'Magick Webp Lossless' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 160.36 ms | 11.131 ms | 0.610 ms | 1.00 | 0.00 | - | - | - | 518 KB |
| 'Magick Webp Lossy' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 23.35 ms | 0.428 ms | 0.023 ms | 0.14 | 0.00 | - | - | - | 67.76 KB | | 'ImageSharp Webp Lossless' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 313.93 ms | 45.605 ms | 2.500 ms | 1.96 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 161,670 KB |
| 'ImageSharp Webp Lossy' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 43.95 ms | 2.850 ms | 0.156 ms | 0.27 | 0.00 | 6250.0000 | 250.0000 | 83.3333 | 26284.72 KB | | | | | | | | | | | | | | | |
| 'Magick Webp Lossless' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 161.44 ms | 3.749 ms | 0.206 ms | 1.00 | 0.00 | - | - | - | 519.26 KB | | 'Magick Webp Lossy' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 23.36 ms | 2.289 ms | 0.125 ms | 0.15 | 0.00 | - | - | - | 67 KB |
| 'ImageSharp Webp Lossless' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 335.78 ms | 78.666 ms | 4.312 ms | 2.08 | 0.03 | 34000.0000 | 5000.0000 | 2000.0000 | 162727.56 KB | | 'ImageSharp Webp Lossy' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 254.64 ms | 19.620 ms | 1.075 ms | 1.59 | 0.00 | 135000.0000 | - | - | 552,713 KB |
| | | | | | | | | | | | | | | 'Magick Webp Lossless' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 160.30 ms | 9.549 ms | 0.523 ms | 1.00 | 0.00 | - | - | - | 518 KB |
| 'Magick Webp Lossy' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 23.48 ms | 4.325 ms | 0.237 ms | 0.15 | 0.00 | - | - | - | 67.66 KB | | 'ImageSharp Webp Lossless' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 320.35 ms | 22.924 ms | 1.257 ms | 2.00 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 161,669 KB |
| 'ImageSharp Webp Lossy' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 43.29 ms | 16.503 ms | 0.905 ms | 0.27 | 0.01 | 6272.7273 | 272.7273 | 90.9091 | 26284.86 KB | | | | | | | | | | | | | | | |
| 'Magick Webp Lossless' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 161.81 ms | 10.693 ms | 0.586 ms | 1.00 | 0.00 | - | - | - | 523.25 KB | | 'Magick Webp Lossy' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 23.37 ms | 0.908 ms | 0.050 ms | 0.15 | 0.00 | - | - | - | 68 KB |
| 'ImageSharp Webp Lossless' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 323.97 ms | 235.468 ms | 12.907 ms | 2.00 | 0.08 | 34000.0000 | 5000.0000 | 2000.0000 | 162724.84 KB | | 'ImageSharp Webp Lossy' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 378.67 ms | 25.540 ms | 1.400 ms | 2.36 | 0.01 | 135000.0000 | - | - | 554,351 KB |
| | | | | | | | | | | | | | | 'Magick Webp Lossless' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 160.13 ms | 5.115 ms | 0.280 ms | 1.00 | 0.00 | - | - | - | 520 KB |
| 'Magick Webp Lossy' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 23.36 ms | 0.448 ms | 0.025 ms | 0.14 | 0.00 | - | - | - | 67.66 KB | | 'ImageSharp Webp Lossless' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 379.01 ms | 71.192 ms | 3.902 ms | 2.37 | 0.02 | 34000.0000 | 5000.0000 | 2000.0000 | 162,119 KB |
| 'ImageSharp Webp Lossy' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 40.11 ms | 2.465 ms | 0.135 ms | 0.25 | 0.00 | 6307.6923 | 230.7692 | 76.9231 | 26284.71 KB |
| 'Magick Webp Lossless' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 161.55 ms | 6.662 ms | 0.365 ms | 1.00 | 0.00 | - | - | - | 518.84 KB |
| 'ImageSharp Webp Lossless' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 298.73 ms | 17.953 ms | 0.984 ms | 1.85 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 162725.13 KB |
*/ */
} }
} }

4
tests/ImageSharp.Benchmarks/Config.cs

@ -34,7 +34,7 @@ namespace SixLabors.ImageSharp.Benchmarks
public MultiFramework() => this.AddJob( public MultiFramework() => this.AddJob(
Job.Default.WithRuntime(ClrRuntime.Net472), Job.Default.WithRuntime(ClrRuntime.Net472),
Job.Default.WithRuntime(CoreRuntime.Core31), Job.Default.WithRuntime(CoreRuntime.Core31),
Job.Default.WithRuntime(CoreRuntime.Core50)); Job.Default.WithRuntime(CoreRuntime.Core50).With(new Argument[] { new MsBuildArgument("/p:DebugType=portable") }));
} }
public class ShortMultiFramework : Config public class ShortMultiFramework : Config
@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Benchmarks
public ShortMultiFramework() => this.AddJob( public ShortMultiFramework() => this.AddJob(
Job.Default.WithRuntime(ClrRuntime.Net472).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3), Job.Default.WithRuntime(ClrRuntime.Net472).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3),
Job.Default.WithRuntime(CoreRuntime.Core31).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3), Job.Default.WithRuntime(CoreRuntime.Core31).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3),
Job.Default.WithRuntime(CoreRuntime.Core50).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3)); Job.Default.WithRuntime(CoreRuntime.Core50).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3).With(new Argument[] { new MsBuildArgument("/p:DebugType=portable") }));
} }
public class ShortCore31 : Config public class ShortCore31 : Config

1
tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj

@ -6,6 +6,7 @@
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<RootNamespace>SixLabors.ImageSharp.Benchmarks</RootNamespace> <RootNamespace>SixLabors.ImageSharp.Benchmarks</RootNamespace>
<GenerateProgramFile>false</GenerateProgramFile> <GenerateProgramFile>false</GenerateProgramFile>
<DebugType>portable</DebugType>
<!--Used to hide test project from dotnet test--> <!--Used to hide test project from dotnet test-->
<IsTestProject>false</IsTestProject> <IsTestProject>false</IsTestProject>
<Configurations>Debug;Release;Debug-InnerLoop;Release-InnerLoop</Configurations> <Configurations>Debug;Release;Debug-InnerLoop;Release-InnerLoop</Configurations>

7
tests/ImageSharp.Benchmarks/benchmark.sh

@ -1,7 +0,0 @@
#!/bin/bash
# Build in release mode
dotnet build -c Release -f netcoreapp2.0
# Run benchmarks
dotnet bin/Release/netcoreapp2.0/ImageSharp.Benchmarks.dll

26
tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs

@ -66,7 +66,7 @@ namespace SixLabors.ImageSharp.Tests
[Fact] [Fact]
public void Rgb24() public void Rgb24()
{ {
var source = new Rgb24(1, 22, 231); var source = new Rgb24(1, 22, 231);
// Act: // Act:
var color = new Color(source); var color = new Color(source);
@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests
[Fact] [Fact]
public void Bgr24() public void Bgr24()
{ {
var source = new Bgr24(1, 22, 231); var source = new Bgr24(1, 22, 231);
// Act: // Act:
var color = new Color(source); var color = new Color(source);
@ -88,6 +88,28 @@ namespace SixLabors.ImageSharp.Tests
Bgr24 data = color; Bgr24 data = color;
Assert.Equal(source, data); Assert.Equal(source, data);
} }
[Fact]
public void GenericPixel()
{
AssertGenericPixel(new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue));
AssertGenericPixel(new Rgba64(1, 2, ushort.MaxValue, ushort.MaxValue - 1));
AssertGenericPixel(new Rgb48(1, 2, ushort.MaxValue - 1));
AssertGenericPixel(new La32(1, ushort.MaxValue - 1));
AssertGenericPixel(new L16(ushort.MaxValue - 1));
AssertGenericPixel(new Rgba32(1, 2, 255, 254));
}
private static void AssertGenericPixel<TPixel>(TPixel source)
where TPixel : unmanaged, IPixel<TPixel>
{
// Act:
var color = Color.FromPixel(source);
// Assert:
TPixel actual = color.ToPixel<TPixel>();
Assert.Equal(source, actual);
}
} }
} }
} }

92
tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs

@ -0,0 +1,92 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
[Trait("Format", "Webp")]
public class ColorSpaceTransformUtilsTests
{
private static void RunCollectColorBlueTransformsTest()
{
uint[] pixelData =
{
3074, 256, 256, 256, 0, 65280, 65280, 65280, 256, 256, 0, 256, 0, 65280, 0, 65280, 16711680, 256,
256, 0, 65024, 0, 256, 256, 0, 65280, 0, 65280, 0, 256, 0, 256
};
int[] expectedOutput =
{
31, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int[] histo = new int[256];
ColorSpaceTransformUtils.CollectColorBlueTransforms(pixelData, 0, 32, 1, 0, 0, histo);
Assert.Equal(expectedOutput, histo);
}
private static void RunCollectColorRedTransformsTest()
{
uint[] pixelData =
{
3074, 256, 256, 256, 0, 65280, 65280, 65280, 256, 256, 0, 256, 0, 65280, 0, 65280, 16711680, 256,
256, 0, 65024, 0, 256, 256, 0, 65280, 0, 65280, 0, 256, 0, 256
};
int[] expectedOutput =
{
31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
};
int[] histo = new int[256];
ColorSpaceTransformUtils.CollectColorRedTransforms(pixelData, 0, 32, 1, 0, histo);
Assert.Equal(expectedOutput, histo);
}
[Fact]
public void CollectColorBlueTransforms_Works() => RunCollectColorBlueTransformsTest();
[Fact]
public void CollectColorRedTransforms_Works() => RunCollectColorRedTransformsTest();
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void CollectColorBlueTransforms_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.AllowAll);
[Fact]
public void CollectColorBlueTransforms_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.DisableSSE41);
[Fact]
public void CollectColorBlueTransforms_WithoutAvx2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.DisableAVX2);
[Fact]
public void CollectColorRedTransforms_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.AllowAll);
[Fact]
public void CollectColorRedTransforms_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.DisableSSE41);
[Fact]
public void CollectColorRedTransforms_WithoutAvx2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.DisableAVX2);
#endif
}
}

94
tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs

@ -132,6 +132,76 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
Assert.Equal(expectedOutput, pixelData); Assert.Equal(expectedOutput, pixelData);
} }
private static void RunPredictor11Test()
{
// arrange
uint[] topData = { 4278258949, 4278258949 };
uint left = 4294839812;
short[] scratch = new short[8];
uint expectedResult = 4294839812;
// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor11(left, top, scratch);
// assert
Assert.Equal(expectedResult, actual);
}
}
}
private static void RunPredictor12Test()
{
// arrange
uint[] topData = { 4294844413, 4294779388 };
uint left = 4294844413;
uint expectedResult = 4294779388;
// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor12(left, top);
// assert
Assert.Equal(expectedResult, actual);
}
}
}
private static void RunPredictor13Test()
{
// arrange
uint[] topData = { 4278193922, 4278193666 };
uint left = 4278193410;
uint expectedResult = 4278193154;
// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor13(left, top);
// assert
Assert.Equal(expectedResult, actual);
}
}
}
[Fact]
public void Predictor11_Works() => RunPredictor11Test();
[Fact]
public void Predictor12_Works() => RunPredictor12Test();
[Fact]
public void Predictor13_Works() => RunPredictor13Test();
[Fact] [Fact]
public void SubtractGreen_Works() => RunSubtractGreenTest(); public void SubtractGreen_Works() => RunSubtractGreenTest();
@ -145,6 +215,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
public void TransformColorInverse_Works() => RunTransformColorInverseTest(); public void TransformColorInverse_Works() => RunTransformColorInverseTest();
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll);
[Fact]
public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2);
[Fact]
public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll);
[Fact]
public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2);
[Fact]
public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll);
[Fact]
public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2);
[Fact] [Fact]
public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll); public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);
@ -169,11 +257,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Fact] [Fact]
public void TransformColor_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableSSE2); public void TransformColor_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableSSE2);
[Fact]
public void TransformColor_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableAVX2);
[Fact] [Fact]
public void TransformColorInverse_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.AllowAll); public void TransformColorInverse_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.AllowAll);
[Fact] [Fact]
public void TransformColorInverse_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.DisableSSE2); public void TransformColorInverse_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.DisableSSE2);
[Fact]
public void TransformColorInverse_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.DisableAVX2);
#endif #endif
} }
} }

122
tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs

@ -0,0 +1,122 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Linq;
using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
[Trait("Format", "Webp")]
public class LossyUtilsTests
{
private static void RunVp8Sse4X4Test()
{
byte[] a =
{
27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129,
129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29, 28,
28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 26,
26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128,
128, 128, 128, 128, 128, 128, 128, 28, 27, 27, 26, 26, 27, 27, 28, 27, 28, 28, 29, 29, 28, 28, 27,
129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128
};
byte[] b =
{
26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26,
26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204
};
int expected = 27;
int actual = LossyUtils.Vp8_Sse4X4(a, b);
Assert.Equal(expected, actual);
}
private static void RunMean16x4Test()
{
// arrange
byte[] input =
{
154, 145, 102, 115, 127, 129, 126, 125, 126, 120, 133, 152, 157, 153, 119, 94, 104, 116, 111, 113,
113, 109, 105, 124, 173, 175, 177, 170, 175, 172, 166, 164, 151, 141, 99, 114, 125, 126, 135, 150,
133, 115, 127, 149, 141, 168, 100, 54, 110, 117, 115, 116, 119, 115, 117, 130, 174, 174, 174, 157,
146, 171, 166, 158, 117, 140, 96, 111, 119, 119, 136, 171, 188, 134, 121, 126, 136, 119, 59, 77,
109, 115, 113, 120, 120, 117, 128, 115, 174, 173, 173, 161, 152, 148, 153, 162, 105, 140, 96, 114,
115, 122, 141, 173, 190, 190, 142, 106, 151, 78, 66, 141, 110, 117, 123, 136, 118, 124, 127, 114,
173, 175, 166, 155, 155, 159, 159, 158
};
uint[] dc = new uint[4];
uint[] expectedDc = { 1940, 2139, 2252, 1813 };
// act
LossyUtils.Mean16x4(input, dc);
// assert
Assert.True(dc.SequenceEqual(expectedDc));
}
private static void RunHadamardTransformTest()
{
byte[] a =
{
27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129,
129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29, 28,
28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 26,
26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128,
128, 128, 128, 128, 128, 128, 128, 28, 27, 27, 26, 26, 27, 27, 28, 27, 28, 28, 29, 29, 28, 28, 27
};
byte[] b =
{
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28
};
ushort[] w = { 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2 };
int expected = 2;
int actual = LossyUtils.Vp8Disto4X4(a, b, w, new int[16]);
Assert.Equal(expected, actual);
}
[Fact]
public void Vp8Sse4X4_Works() => RunVp8Sse4X4Test();
[Fact]
public void Mean16x4_Works() => RunMean16x4Test();
[Fact]
public void HadamardTransform_Works() => RunHadamardTransformTest();
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void Vp8Sse4X4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.AllowAll);
[Fact]
public void Vp8Sse4X4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.DisableHWIntrinsic);
[Fact]
public void Mean16x4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.AllowAll);
[Fact]
public void Mean16x4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.DisableHWIntrinsic);
[Fact]
public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll);
[Fact]
public void HadamardTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableHWIntrinsic);
#endif
}
}

12
tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

@ -40,8 +40,13 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Fact] [Fact]
public void ColorSpaceTransform_WithBikeImage_WithoutSSE41_Works() public void ColorSpaceTransform_WithBikeImage_WithoutSSE41_Works()
=> FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_WithBikeImage_ProducesExpectedData, HwIntrinsics.DisableSSE41); => FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_WithBikeImage_ProducesExpectedData, HwIntrinsics.DisableSSE41);
[Fact]
public void ColorSpaceTransform_WithBikeImage_WithoutAvx2_Works()
=> FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_WithBikeImage_ProducesExpectedData, HwIntrinsics.DisableAVX2);
#endif #endif
// Test image: Input\Webp\peak.png
private static void RunColorSpaceTransformTestWithPeakImage() private static void RunColorSpaceTransformTestWithPeakImage()
{ {
// arrange // arrange
@ -90,14 +95,16 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits);
uint[] transformData = new uint[transformWidth * transformHeight]; uint[] transformData = new uint[transformWidth * transformHeight];
int[] scratch = new int[256];
// act // act
PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch);
// assert // assert
Assert.Equal(expectedData, transformData); Assert.Equal(expectedData, transformData);
} }
// Test image: Input\Png\Bike.png
private static void RunColorSpaceTransformTestWithBikeImage() private static void RunColorSpaceTransformTestWithBikeImage()
{ {
// arrange // arrange
@ -119,9 +126,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits);
uint[] transformData = new uint[transformWidth * transformHeight]; uint[] transformData = new uint[transformWidth * transformHeight];
int[] scratch = new int[256];
// act // act
PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch);
// assert // assert
Assert.Equal(expectedData, transformData); Assert.Equal(expectedData, transformData);

53
tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs

@ -0,0 +1,53 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Linq;
using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
[Trait("Format", "Webp")]
public class QuantEncTests
{
private static unsafe void RunQuantizeBlockTest()
{
// arrange
short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 };
short[] output = new short[16];
ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 };
ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 };
uint[] bias = { 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296 };
uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 };
short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 };
int expectedResult = 1;
Vp8Matrix vp8Matrix = default;
for (int i = 0; i < 16; i++)
{
vp8Matrix.Q[i] = q[i];
vp8Matrix.IQ[i] = iq[i];
vp8Matrix.Bias[i] = bias[i];
vp8Matrix.ZThresh[i] = zthresh[i];
}
// act
int actualResult = QuantEnc.QuantizeBlock(input, output, ref vp8Matrix);
// assert
Assert.True(output.SequenceEqual(expectedOutput));
Assert.Equal(expectedResult, actualResult);
}
[Fact]
public void QuantizeBlock_Works() => RunQuantizeBlockTest();
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll);
[Fact]
public void QuantizeBlock_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableHWIntrinsic);
#endif
}
}

98
tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs

@ -0,0 +1,98 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Linq;
using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
[Trait("Format", "Webp")]
public class Vp8EncodingTests
{
private static void RunOneInverseTransformTest()
{
// arrange
byte[] reference =
{
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129
};
short[] input = { 1, 216, -48, 0, 96, -24, -48, 24, 0, -24, 24, 0, 0, 0, 0, 0, 38, -240, -72, -24, 0, -24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
byte[] dst = new byte[128];
byte[] expected =
{
161, 160, 149, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 160, 160, 133, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 156, 147, 109, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 152, 128, 87, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
};
int[] scratch = new int[16];
// act
Vp8Encoding.ITransformOne(reference, input, dst, scratch);
// assert
Assert.True(dst.SequenceEqual(expected));
}
private static void RunTwoInverseTransformTest()
{
// arrange
byte[] reference =
{
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
129, 129, 129, 129, 129, 129, 129, 129
};
short[] input = { 1, 216, -48, 0, 96, -24, -48, 24, 0, -24, 24, 0, 0, 0, 0, 0, 38, -240, -72, -24, 0, -24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
byte[] dst = new byte[128];
byte[] expected =
{
161, 160, 149, 105, 78, 127, 156, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 160, 160, 133, 85, 81, 129, 155, 167, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 156, 147, 109, 76, 85, 130, 153, 163, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 128, 87, 83, 88, 132, 152, 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int[] scratch = new int[16];
// act
Vp8Encoding.ITransform(reference, input, dst, scratch);
// assert
Assert.True(dst.SequenceEqual(expected));
}
[Fact]
public void OneInverseTransform_Works() => RunOneInverseTransformTest();
[Fact]
public void TwoInverseTransform_Works() => RunTwoInverseTransformTest();
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void OneInverseTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunOneInverseTransformTest, HwIntrinsics.AllowAll);
[Fact]
public void OneInverseTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunOneInverseTransformTest, HwIntrinsics.DisableHWIntrinsic);
[Fact]
public void TwoInverseTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTwoInverseTransformTest, HwIntrinsics.AllowAll);
[Fact]
public void TwoInverseTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTwoInverseTransformTest, HwIntrinsics.DisableHWIntrinsic);
#endif
}
}

25
tests/ImageSharp.Tests/Formats/WebP/WebpMetaDataTests.cs

@ -63,6 +63,31 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
} }
} }
[Theory]
[InlineData(WebpFileFormatType.Lossy)]
[InlineData(WebpFileFormatType.Lossless)]
public void Encode_WritesExifWithPadding(WebpFileFormatType fileFormatType)
{
// arrange
using var input = new Image<Rgba32>(25, 25);
using var memoryStream = new MemoryStream();
var expectedExif = new ExifProfile();
string expectedSoftware = "ImageSharp";
expectedExif.SetValue(ExifTag.Software, expectedSoftware);
input.Metadata.ExifProfile = expectedExif;
// act
input.Save(memoryStream, new WebpEncoder() { FileFormat = fileFormatType });
memoryStream.Position = 0;
// assert
using var image = Image.Load<Rgba32>(memoryStream);
ExifProfile actualExif = image.Metadata.ExifProfile;
Assert.NotNull(actualExif);
Assert.Equal(expectedExif.Values.Count, actualExif.Values.Count);
Assert.Equal(expectedSoftware, actualExif.GetValue(ExifTag.Software).Value);
}
[Theory] [Theory]
[WithFile(TestImages.Webp.Lossy.WithExif, PixelTypes.Rgba32)] [WithFile(TestImages.Webp.Lossy.WithExif, PixelTypes.Rgba32)]
public void EncodeLossyWebp_PreservesExif<TPixel>(TestImageProvider<TPixel> provider) public void EncodeLossyWebp_PreservesExif<TPixel>(TestImageProvider<TPixel> provider)

Loading…
Cancel
Save