diff --git a/ImageSharp.sln b/ImageSharp.sln
index c188d93150..f16f98ac59 100644
--- a/ImageSharp.sln
+++ b/ImageSharp.sln
@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.28902.138
+# Visual Studio Version 17
+VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1-D75E-4C6D-83EB-80367343E0D7}"
ProjectSection(SolutionItems) = preProject
@@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1
ci-build.ps1 = ci-build.ps1
ci-pack.ps1 = ci-pack.ps1
ci-test.ps1 = ci-test.ps1
+ codecov.yml = codecov.yml
Directory.Build.props = Directory.Build.props
Directory.Build.targets = Directory.Build.targets
LICENSE = LICENSE
diff --git a/codecov.yml b/codecov.yml
index 833fc0a51a..310eefb8c2 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -9,3 +9,14 @@ codecov:
# Avoid Report Expired
# https://docs.codecov.io/docs/codecov-yaml#section-expired-reports
max_report_age: off
+
+coverage:
+ # Use integer precision
+ # https://docs.codecov.com/docs/codecovyml-reference#coverageprecision
+ precision: 0
+
+ # Explicitly control coverage status checks
+ # https://docs.codecov.com/docs/commit-status#disabling-a-status
+ status:
+ project: on
+ patch: off
diff --git a/shared-infrastructure b/shared-infrastructure
index a042aba176..33cb12ca77 160000
--- a/shared-infrastructure
+++ b/shared-infrastructure
@@ -1 +1 @@
-Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3
+Subproject commit 33cb12ca77f919b44de56f344d2627cc2a108c3a
diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs
index 0455fd26a4..bf7869e53d 100644
--- a/src/ImageSharp/Color/Color.Conversions.cs
+++ b/src/ImageSharp/Color/Color.Conversions.cs
@@ -17,56 +17,118 @@ namespace SixLabors.ImageSharp
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Rgba64 pixel) => this.data = pixel;
+ public Color(Rgba64 pixel)
+ {
+ this.data = pixel;
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The containing the color information.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Color(Rgb48 pixel)
+ {
+ this.data = new Rgba64(pixel.R, pixel.G, pixel.B, ushort.MaxValue);
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The containing the color information.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Color(La32 pixel)
+ {
+ this.data = new Rgba64(pixel.L, pixel.L, pixel.L, pixel.A);
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The containing the color information.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Color(L16 pixel)
+ {
+ this.data = new Rgba64(pixel.PackedValue, pixel.PackedValue, pixel.PackedValue, ushort.MaxValue);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Rgba32 pixel) => this.data = new Rgba64(pixel);
+ public Color(Rgba32 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Argb32 pixel) => this.data = new Rgba64(pixel);
+ public Color(Argb32 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Bgra32 pixel) => this.data = new Rgba64(pixel);
+ public Color(Bgra32 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Rgb24 pixel) => this.data = new Rgba64(pixel);
+ public Color(Rgb24 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Bgr24 pixel) => this.data = new Rgba64(pixel);
+ public Color(Bgr24 pixel)
+ {
+ this.data = new Rgba64(pixel);
+ this.boxedHighPrecisionPixel = null;
+ }
///
/// Initializes a new instance of the struct.
///
/// The containing the color information.
[MethodImpl(InliningOptions.ShortMethod)]
- public Color(Vector4 vector) => this.data = new Rgba64(vector);
+ public Color(Vector4 vector)
+ {
+ vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One);
+ this.boxedHighPrecisionPixel = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W);
+ this.data = default;
+ }
///
/// Converts a to .
///
/// The .
/// The .
- public static explicit operator Vector4(Color color) => color.data.ToVector4();
+ public static explicit operator Vector4(Color color) => color.ToVector4();
///
/// Converts an to .
@@ -74,24 +136,82 @@ namespace SixLabors.ImageSharp
/// The .
/// The .
[MethodImpl(InliningOptions.ShortMethod)]
- public static explicit operator Color(Vector4 source) => new Color(source);
+ public static explicit operator Color(Vector4 source) => new(source);
[MethodImpl(InliningOptions.ShortMethod)]
- internal Rgba32 ToRgba32() => this.data.ToRgba32();
+ internal Rgba32 ToRgba32()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToRgba32();
+ }
+
+ Rgba32 value = default;
+ this.boxedHighPrecisionPixel.ToRgba32(ref value);
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Bgra32 ToBgra32() => this.data.ToBgra32();
+ internal Bgra32 ToBgra32()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToBgra32();
+ }
+
+ Bgra32 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Argb32 ToArgb32() => this.data.ToArgb32();
+ internal Argb32 ToArgb32()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToArgb32();
+ }
+
+ Argb32 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Rgb24 ToRgb24() => this.data.ToRgb24();
+ internal Rgb24 ToRgb24()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToRgb24();
+ }
+
+ Rgb24 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Bgr24 ToBgr24() => this.data.ToBgr24();
+ internal Bgr24 ToBgr24()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToBgr24();
+ }
+
+ Bgr24 value = default;
+ value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
+ return value;
+ }
[MethodImpl(InliningOptions.ShortMethod)]
- internal Vector4 ToVector4() => this.data.ToVector4();
+ internal Vector4 ToVector4()
+ {
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.ToScaledVector4();
+ }
+
+ return this.boxedHighPrecisionPixel.ToScaledVector4();
+ }
}
}
diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs
index d5eedc160b..7c21d62ddf 100644
--- a/src/ImageSharp/Color/Color.cs
+++ b/src/ImageSharp/Color/Color.cs
@@ -4,7 +4,6 @@
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
@@ -21,6 +20,7 @@ namespace SixLabors.ImageSharp
public readonly partial struct Color : IEquatable
{
private readonly Rgba64 data;
+ private readonly IPixel boxedHighPrecisionPixel;
[MethodImpl(InliningOptions.ShortMethod)]
private Color(byte r, byte g, byte b, byte a)
@@ -30,6 +30,8 @@ namespace SixLabors.ImageSharp
ColorNumerics.UpscaleFrom8BitTo16Bit(g),
ColorNumerics.UpscaleFrom8BitTo16Bit(b),
ColorNumerics.UpscaleFrom8BitTo16Bit(a));
+
+ this.boxedHighPrecisionPixel = null;
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -40,6 +42,15 @@ namespace SixLabors.ImageSharp
ColorNumerics.UpscaleFrom8BitTo16Bit(g),
ColorNumerics.UpscaleFrom8BitTo16Bit(b),
ushort.MaxValue);
+
+ this.boxedHighPrecisionPixel = null;
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private Color(IPixel pixel)
+ {
+ this.boxedHighPrecisionPixel = pixel;
+ this.data = default;
}
///
@@ -52,13 +63,10 @@ namespace SixLabors.ImageSharp
/// otherwise, false.
///
[MethodImpl(InliningOptions.ShortMethod)]
- public static bool operator ==(Color left, Color right)
- {
- return left.Equals(right);
- }
+ public static bool operator ==(Color left, Color right) => left.Equals(right);
///
- /// Checks whether two structures are equal.
+ /// Checks whether two structures are not equal.
///
/// The left hand operand.
/// The right hand operand.
@@ -67,10 +75,7 @@ namespace SixLabors.ImageSharp
/// otherwise, false.
///
[MethodImpl(InliningOptions.ShortMethod)]
- public static bool operator !=(Color left, Color right)
- {
- return !left.Equals(right);
- }
+ public static bool operator !=(Color left, Color right) => !left.Equals(right);
///
/// Creates a from RGBA bytes.
@@ -81,7 +86,7 @@ namespace SixLabors.ImageSharp
/// The alpha component (0-255).
/// The .
[MethodImpl(InliningOptions.ShortMethod)]
- public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a);
+ public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a);
///
/// Creates a from RGB bytes.
@@ -91,7 +96,46 @@ namespace SixLabors.ImageSharp
/// The blue component (0-255).
/// The .
[MethodImpl(InliningOptions.ShortMethod)]
- public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b);
+ public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b);
+
+ ///
+ /// Creates a from the given .
+ ///
+ /// The pixel to convert from.
+ /// The pixel format.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static Color FromPixel(TPixel pixel)
+ where TPixel : unmanaged, IPixel
+ {
+ // Avoid boxing in case we can convert to Rgba64 safely and efficently
+ if (typeof(TPixel) == typeof(Rgba64))
+ {
+ return new((Rgba64)(object)pixel);
+ }
+ else if (typeof(TPixel) == typeof(Rgb48))
+ {
+ return new((Rgb48)(object)pixel);
+ }
+ else if (typeof(TPixel) == typeof(La32))
+ {
+ return new((La32)(object)pixel);
+ }
+ else if (typeof(TPixel) == typeof(L16))
+ {
+ return new((L16)(object)pixel);
+ }
+ else if (Unsafe.SizeOf() <= Unsafe.SizeOf())
+ {
+ Rgba32 p = default;
+ pixel.ToRgba32(ref p);
+ return new(p);
+ }
+ else
+ {
+ return new(pixel);
+ }
+ }
///
/// Creates a new instance of the struct
@@ -213,7 +257,7 @@ namespace SixLabors.ImageSharp
public override string ToString() => this.ToHex();
///
- /// Converts the color instance to a specified type.
+ /// Converts the color instance to a specified type.
///
/// The pixel type to convert to.
/// The pixel value.
@@ -221,13 +265,18 @@ namespace SixLabors.ImageSharp
public TPixel ToPixel()
where TPixel : unmanaged, IPixel
{
- TPixel pixel = default;
+ if (this.boxedHighPrecisionPixel is TPixel pixel)
+ {
+ return pixel;
+ }
+
+ pixel = default;
pixel.FromRgba64(this.data);
return pixel;
}
///
- /// Bulk converts a span of to a span of a specified type.
+ /// Bulk converts a span of to a span of a specified type.
///
/// The pixel type to convert to.
/// The configuration.
@@ -240,28 +289,38 @@ namespace SixLabors.ImageSharp
Span destination)
where TPixel : unmanaged, IPixel
{
- ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source);
- PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination);
+ Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination));
+ for (int i = 0; i < source.Length; i++)
+ {
+ destination[i] = source[i].ToPixel();
+ }
}
///
[MethodImpl(InliningOptions.ShortMethod)]
public bool Equals(Color other)
{
- return this.data.PackedValue == other.data.PackedValue;
+ if (this.boxedHighPrecisionPixel is null && other.boxedHighPrecisionPixel is null)
+ {
+ return this.data.PackedValue == other.data.PackedValue;
+ }
+
+ return this.boxedHighPrecisionPixel?.Equals(other.boxedHighPrecisionPixel) == true;
}
///
- public override bool Equals(object obj)
- {
- return obj is Color other && this.Equals(other);
- }
+ public override bool Equals(object obj) => obj is Color other && this.Equals(other);
///
[MethodImpl(InliningOptions.ShortMethod)]
public override int GetHashCode()
{
- return this.data.PackedValue.GetHashCode();
+ if (this.boxedHighPrecisionPixel is null)
+ {
+ return this.data.PackedValue.GetHashCode();
+ }
+
+ return this.boxedHighPrecisionPixel.GetHashCode();
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
index d9d42e0614..abe59516fa 100644
--- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
+++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
@@ -288,8 +288,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
/// The number of components to write.
private void WriteDefineHuffmanTables(int componentCount)
{
+ // This uses a C#'s compiler optimization that refers to the static data segment of the assembly,
+ // and doesn't incur any allocation at all.
// Table identifiers.
- ReadOnlySpan headers = stackalloc byte[]
+ ReadOnlySpan headers = new byte[]
{
0x00,
0x10,
diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
index 987dc150c2..cf3cd7eb14 100644
--- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs
+++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
@@ -1071,7 +1071,7 @@ namespace SixLabors.ImageSharp.Formats.Png
int bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length);
while (bytesRead != 0)
{
- uncompressedBytes.AddRange(this.buffer.AsSpan().Slice(0, bytesRead).ToArray());
+ uncompressedBytes.AddRange(this.buffer.AsSpan(0, bytesRead).ToArray());
bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length);
}
diff --git a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
index baeabdbb20..d4d1d1cb65 100644
--- a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
+++ b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
@@ -256,8 +256,8 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Compressors
private void ResetTables()
{
- this.children.GetSpan().Fill(0);
- this.siblings.GetSpan().Fill(0);
+ this.children.GetSpan().Clear();
+ this.siblings.GetSpan().Clear();
this.bitsPerCode = MinBits;
this.maxCode = MaxValue(this.bitsPerCode);
this.nextValidCode = EoiCode + 1;
diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs
index e86418741d..972f4d8ff1 100644
--- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs
+++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs
@@ -64,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
uint bitsWritten = 0;
for (int y = 0; y < height; y++)
{
- scanLine.Fill(0);
+ scanLine.Clear();
Decode2DScanline(bitReader, this.isWhiteZero, referenceScanLine, scanLine);
bitsWritten = this.WriteScanLine(buffer, scanLine, bitsWritten);
@@ -116,7 +116,15 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
{
// If a TIFF reader encounters EOFB before the expected number of lines has been extracted,
// it is appropriate to assume that the missing rows consist entirely of white pixels.
- scanline.Fill(whiteIsZero ? (byte)0 : (byte)255);
+ if (whiteIsZero)
+ {
+ scanline.Clear();
+ }
+ else
+ {
+ scanline.Fill((byte)255);
+ }
+
break;
}
diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
index abf44127a9..d6ceca5bf5 100644
--- a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
+++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
@@ -142,10 +142,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
[MethodImpl(InliningOptions.ShortMethod)]
public bool ReadBool() => this.ReadValue(1) is 1;
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
- Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
uint v = 0;
while (nBits-- > 0)
@@ -156,10 +157,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
return v;
}
+ [MethodImpl(InliningOptions.ShortMethod)]
public int ReadSignedValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
- Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
int value = (int)this.ReadValue(nBits);
return this.ReadValue(1) != 0 ? -value : value;
diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
index 601336fa4b..4df2feba81 100644
--- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
+++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
@@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
private const int Wbits = 32;
- private readonly uint[] bitMask =
+ private static readonly uint[] BitMask =
{
0,
0x000001, 0x000003, 0x000007, 0x00000f,
@@ -125,19 +125,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
/// The number of bits to read (should not exceed 16).
/// A ushort value.
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
if (!this.Eos && nBits <= Vp8LMaxNumBitRead)
{
- ulong val = this.PrefetchBits() & this.bitMask[nBits];
+ ulong val = this.PrefetchBits() & BitMask[nBits];
this.bitPos += nBits;
this.ShiftBytes();
return (uint)val;
}
- this.SetEndOfStream();
return 0;
}
@@ -169,6 +169,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
/// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
public void FillBitWindow()
{
if (this.bitPos >= Wbits)
@@ -181,7 +182,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// Returns true if there was an attempt at reading bit past the end of the buffer.
///
/// True, if end of buffer was reached.
- public bool IsEndOfStream() => this.Eos || ((this.pos == this.len) && (this.bitPos > Lbits));
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public bool IsEndOfStream() => this.Eos || (this.pos == this.len && this.bitPos > Lbits);
[MethodImpl(InliningOptions.ShortMethod)]
private void DoFillBitWindow() => this.ShiftBytes();
@@ -189,6 +191,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
/// If not at EOS, reload up to Vp8LLbits byte-by-byte.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
private void ShiftBytes()
{
System.Span dataSpan = this.Data.Memory.Span;
@@ -199,17 +202,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
++this.pos;
this.bitPos -= 8;
}
-
- if (this.IsEndOfStream())
- {
- this.SetEndOfStream();
- }
- }
-
- private void SetEndOfStream()
- {
- this.Eos = true;
- this.bitPos = 0; // To avoid undefined behaviour with shifts.
}
}
}
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
index 41623f2878..9208881360 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
@@ -10,11 +10,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
{
internal abstract class BitWriterBase
{
+ private const uint MaxDimension = 16777215;
+
+ private const ulong MaxCanvasPixels = 4294967295ul;
+
+ protected const uint ExtendedFileChunkSize = WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize;
+
///
/// Buffer to write to.
///
private byte[] buffer;
+ ///
+ /// A scratch buffer to reduce allocations.
+ ///
+ private readonly byte[] scratchBuffer = new byte[4];
+
///
/// Initializes a new instance of the class.
///
@@ -52,15 +63,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
///
public abstract void Finish();
- ///
- /// Writes the encoded image to the stream.
- ///
- /// The stream to write to.
- /// The exif profile.
- /// The width of the image.
- /// The height of the image.
- public abstract void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height);
-
protected void ResizeBuffer(int maxBytes, int sizeRequired)
{
int newSize = (3 * maxBytes) >> 1;
@@ -81,13 +83,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// The block length.
protected void WriteRiffHeader(Stream stream, uint riffSize)
{
- Span buf = stackalloc byte[4];
stream.Write(WebpConstants.RiffFourCc);
- BinaryPrimitives.WriteUInt32LittleEndian(buf, riffSize);
- stream.Write(buf);
+ BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, riffSize);
+ stream.Write(this.scratchBuffer.AsSpan(0, 4));
stream.Write(WebpConstants.WebpHeader);
}
+ ///
+ /// Calculates the exif chunk size.
+ ///
+ /// The exif profile bytes.
+ /// The exif chunk size in bytes.
+ protected uint ExifChunkSize(byte[] exifBytes)
+ {
+ uint exifSize = (uint)exifBytes.Length;
+ uint exifChunkSize = WebpConstants.ChunkHeaderSize + exifSize + (exifSize & 1);
+
+ return exifChunkSize;
+ }
+
///
/// Writes the Exif profile to the stream.
///
@@ -97,12 +111,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
{
DebugGuard.NotNull(exifBytes, nameof(exifBytes));
- Span buf = stackalloc byte[4];
+ uint size = (uint)exifBytes.Length;
+ Span buf = this.scratchBuffer.AsSpan(0, 4);
BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Exif);
stream.Write(buf);
- BinaryPrimitives.WriteUInt32LittleEndian(buf, (uint)exifBytes.Length);
+ BinaryPrimitives.WriteUInt32LittleEndian(buf, size);
stream.Write(buf);
stream.Write(exifBytes);
+
+ // Add padding byte if needed.
+ if ((size & 1) == 1)
+ {
+ stream.WriteByte(0);
+ }
}
///
@@ -112,16 +133,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// A exif profile or null, if it does not exist.
/// The width of the image.
/// The height of the image.
- protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height)
+ /// Flag indicating, if a alpha channel is present.
+ protected void WriteVp8XHeader(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{
- int maxDimension = 16777215;
- if (width > maxDimension || height > maxDimension)
+ if (width > MaxDimension || height > MaxDimension)
{
- WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {maxDimension}");
+ WebpThrowHelper.ThrowInvalidImageDimensions($"Image width or height exceeds maximum allowed dimension of {MaxDimension}");
}
// The spec states that the product of Canvas Width and Canvas Height MUST be at most 2^32 - 1.
- if (width * height > 4294967295ul)
+ if (width * height > MaxCanvasPixels)
{
WebpThrowHelper.ThrowInvalidImageDimensions("The product of image width and height MUST be at most 2^32 - 1");
}
@@ -133,7 +154,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
flags |= 8;
}
- Span buf = stackalloc byte[4];
+ if (hasAlpha)
+ {
+ // Set alpha bit.
+ flags |= 16;
+ }
+
+ Span buf = this.scratchBuffer.AsSpan(0, 4);
stream.Write(WebpConstants.Vp8XMagicBytes);
BinaryPrimitives.WriteUInt32LittleEndian(buf, WebpConstants.Vp8XChunkSize);
stream.Write(buf);
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
index 7628247fd6..3b2f943db5 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
@@ -399,8 +399,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
}
}
- ///
- public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height)
+ ///
+ /// Writes the encoded image to the stream.
+ ///
+ /// The stream to write to.
+ /// The exif profile.
+ /// The width of the image.
+ /// The height of the image.
+ /// Flag indicating, if a alpha channel is present.
+ public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{
bool isVp8X = false;
byte[] exifBytes = null;
@@ -408,9 +415,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
if (exifProfile != null)
{
isVp8X = true;
- riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize;
+ riffSize += ExtendedFileChunkSize;
exifBytes = exifProfile.ToByteArray();
- riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length;
+ riffSize += this.ExifChunkSize(exifBytes);
}
this.Finish();
@@ -433,7 +440,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size;
// Emit headers and partition #0
- this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile);
+ this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, hasAlpha);
bitWriterPartZero.WriteToStream(stream);
// Write the encoded image to the stream.
@@ -616,14 +623,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
while (it.Next());
}
- private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile)
+ private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile, bool hasAlpha)
{
this.WriteRiffHeader(stream, riffSize);
// Write VP8X, header if necessary.
if (isVp8X)
{
- this.WriteVp8XHeader(stream, exifProfile, width, height);
+ this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha);
}
this.WriteVp8Header(stream, vp8Size);
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs
index 2f942231fb..b83865aa36 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8LBitWriter.cs
@@ -127,19 +127,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
this.used = 0;
}
- ///
- public override void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height)
+ ///
+ /// Writes the encoded image to the stream.
+ ///
+ /// The stream to write to.
+ /// The exif profile.
+ /// The width of the image.
+ /// The height of the image.
+ /// Flag indicating, if a alpha channel is present.
+ public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, uint width, uint height, bool hasAlpha)
{
- Span buffer = stackalloc byte[4];
bool isVp8X = false;
byte[] exifBytes = null;
uint riffSize = 0;
if (exifProfile != null)
{
isVp8X = true;
- riffSize += WebpConstants.ChunkHeaderSize + WebpConstants.Vp8XChunkSize;
+ riffSize += ExtendedFileChunkSize;
exifBytes = exifProfile.ToByteArray();
- riffSize += WebpConstants.ChunkHeaderSize + (uint)exifBytes.Length;
+ riffSize += this.ExifChunkSize(exifBytes);
}
this.Finish();
@@ -154,15 +160,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
// Write VP8X, header if necessary.
if (isVp8X)
{
- this.WriteVp8XHeader(stream, exifProfile, width, height);
+ this.WriteVp8XHeader(stream, exifProfile, width, height, hasAlpha);
}
// Write magic bytes indicating its a lossless webp.
stream.Write(WebpConstants.Vp8LMagicBytes);
// Write Vp8 Header.
- BinaryPrimitives.WriteUInt32LittleEndian(buffer, size);
- stream.Write(buffer);
+ BinaryPrimitives.WriteUInt32LittleEndian(this.scratchBuffer, size);
+ stream.Write(this.scratchBuffer.AsSpan(0, 4));
stream.WriteByte(WebpConstants.Vp8LHeaderMagicByte);
// Write the encoded bytes of the image to the stream.
diff --git a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
index 7dbf49d45e..000de4f88c 100644
--- a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
+++ b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
@@ -35,6 +35,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
///
/// Gets the number of entropy-analysis passes (in [1..10]).
+ /// Defaults to 1.
///
int EntropyPasses { get; }
diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
index 70c4efb990..dc546f8ac2 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
@@ -49,6 +49,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bitCostBest = -1;
int cacheBitsInitial = cacheBits;
Vp8LHashChain hashChainBox = null;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1)
{
int cacheBitsTmp = cacheBitsInitial;
@@ -81,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Keep the best backward references.
var histo = new Vp8LHistogram(worst, cacheBitsTmp);
- double bitCost = histo.EstimateBits();
+ double bitCost = histo.EstimateBits(stats, bitsEntropy);
if (lz77TypeBest == 0 || bitCost < bitCostBest)
{
@@ -100,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox;
BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst);
var histo = new Vp8LHistogram(worst, cacheBits);
- double bitCostTrace = histo.EstimateBits();
+ double bitCostTrace = histo.EstimateBits(stats, bitsEntropy);
if (bitCostTrace < bitCostBest)
{
best = worst;
@@ -214,9 +216,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i <= cacheBitsMax; i++)
{
- double entropy = histos[i].EstimateBits();
+ double entropy = histos[i].EstimateBits(stats, bitsEntropy);
if (i == 0 || entropy < entropyMin)
{
entropyMin = entropy;
diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
index 8596d85558..02bbc38fcf 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
+using System.Runtime.CompilerServices;
+
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
///
@@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Inserts a new color into the cache.
///
/// The color to insert.
+ [MethodImpl(InliningOptions.ShortMethod)]
public void Insert(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
@@ -52,6 +55,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// The key to lookup.
/// The color for the key.
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint Lookup(int key) => this.Colors[key];
///
@@ -59,6 +63,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// The color to check.
/// The index of the color in the cache or -1 if its not present.
+ [MethodImpl(InliningOptions.ShortMethod)]
public int Contains(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
@@ -70,6 +75,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// The color.
/// The index for the color.
+ [MethodImpl(InliningOptions.ShortMethod)]
public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift);
///
@@ -77,8 +83,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// The key.
/// The color to add.
+ [MethodImpl(InliningOptions.ShortMethod)]
public void Set(uint key, uint bgra) => this.Colors[key] = bgra;
+ [MethodImpl(InliningOptions.ShortMethod)]
public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift);
}
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs
new file mode 100644
index 0000000000..71f3c5ca9e
--- /dev/null
+++ b/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs
@@ -0,0 +1,268 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
+namespace SixLabors.ImageSharp.Formats.Webp.Lossless
+{
+ internal static class ColorSpaceTransformUtils
+ {
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector128 CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
+
+ private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
+
+ private static readonly Vector256 CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte();
+
+ private static readonly Vector256 CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte();
+
+ private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
+
+ private static readonly Vector128 CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly Vector128 CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
+
+ private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
+
+ private static readonly Vector256 CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255);
+
+ private static readonly Vector256 CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30);
+
+ private static readonly Vector256 CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
+
+ private static readonly Vector256 CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly Vector256 CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+#endif
+
+ public static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo)
+ {
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported && tileWidth >= 16)
+ {
+ const int span = 16;
+ Span values = stackalloc ushort[span];
+ var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue));
+ var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue));
+ for (int y = 0; y < tileHeight; y++)
+ {
+ Span srcSpan = bgra.Slice(y * stride);
+ ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
+ for (nint x = 0; x <= tileWidth - span; x += span)
+ {
+ nint input0Idx = x;
+ nint input1Idx = x + (span / 2);
+ Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
+ Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
+ Vector256 r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
+ Vector256 r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
+ Vector256 r = Avx2.Or(r0, r1);
+ Vector256 gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
+ Vector256 gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
+ Vector256 gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
+ Vector256 g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
+ Vector256 a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
+ Vector256 b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
+ Vector256 c = Avx2.Subtract(gb.AsByte(), b.AsByte());
+ Vector256 d = Avx2.Subtract(c, a.AsByte());
+ Vector256 e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
+
+ ref ushort outputRef = ref MemoryMarshal.GetReference(values);
+ Unsafe.As>(ref outputRef) = e.AsUInt16();
+
+ for (int i = 0; i < span; i++)
+ {
+ ++histo[values[i]];
+ }
+ }
+ }
+
+ int leftOver = tileWidth & (span - 1);
+ if (leftOver > 0)
+ {
+ CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
+ }
+ }
+ else if (Sse41.IsSupported)
+ {
+ const int span = 8;
+ Span values = stackalloc ushort[span];
+ var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
+ var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
+ for (int y = 0; y < tileHeight; y++)
+ {
+ Span srcSpan = bgra.Slice(y * stride);
+ ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
+ for (nint x = 0; x <= tileWidth - span; x += span)
+ {
+ nint input0Idx = x;
+ nint input1Idx = x + (span / 2);
+ Vector128 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
+ Vector128 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
+ Vector128 r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
+ Vector128 r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
+ Vector128 r = Sse2.Or(r0, r1);
+ Vector128 gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
+ Vector128 gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
+ Vector128 gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
+ Vector128 g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
+ Vector128 a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
+ Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
+ Vector128 c = Sse2.Subtract(gb.AsByte(), b.AsByte());
+ Vector128 d = Sse2.Subtract(c, a.AsByte());
+ Vector128 e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
+
+ ref ushort outputRef = ref MemoryMarshal.GetReference(values);
+ Unsafe.As>(ref outputRef) = e.AsUInt16();
+
+ for (int i = 0; i < span; i++)
+ {
+ ++histo[values[i]];
+ }
+ }
+ }
+
+ int leftOver = tileWidth & (span - 1);
+ if (leftOver > 0)
+ {
+ CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
+ }
+ }
+ else
+#endif
+ {
+ CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
+ }
+ }
+
+ private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo)
+ {
+ int pos = 0;
+ while (tileHeight-- > 0)
+ {
+ for (int x = 0; x < tileWidth; x++)
+ {
+ int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
+ ++histo[idx];
+ }
+
+ pos += stride;
+ }
+ }
+
+ public static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo)
+ {
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported && tileWidth >= 16)
+ {
+ var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed));
+ const int span = 16;
+ Span values = stackalloc ushort[span];
+ for (int y = 0; y < tileHeight; y++)
+ {
+ Span srcSpan = bgra.Slice(y * stride);
+ ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
+ for (nint x = 0; x <= tileWidth - span; x += span)
+ {
+ nint input0Idx = x;
+ nint input1Idx = x + (span / 2);
+ Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
+ Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
+ Vector256 g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0
+ Vector256 g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256);
+ Vector256 g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
+ Vector256 a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
+ Vector256 a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16);
+ Vector256 a = Avx2.PackUnsignedSaturate(a0, a1); // x r
+ Vector256 b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr
+ Vector256 c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r'
+ Vector256 d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r'
+
+ ref ushort outputRef = ref MemoryMarshal.GetReference(values);
+ Unsafe.As>(ref outputRef) = d.AsUInt16();
+
+ for (int i = 0; i < span; i++)
+ {
+ ++histo[values[i]];
+ }
+ }
+ }
+
+ int leftOver = tileWidth & (span - 1);
+ if (leftOver > 0)
+ {
+ CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
+ }
+ }
+ else if (Sse41.IsSupported)
+ {
+ var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
+ const int span = 8;
+ Span values = stackalloc ushort[span];
+ for (int y = 0; y < tileHeight; y++)
+ {
+ Span srcSpan = bgra.Slice(y * stride);
+ ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
+ for (nint x = 0; x <= tileWidth - span; x += span)
+ {
+ nint input0Idx = x;
+ nint input1Idx = x + (span / 2);
+ Vector128 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
+ Vector128 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
+ Vector128 g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
+ Vector128 g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
+ Vector128 g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
+ Vector128 a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
+ Vector128 a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
+ Vector128 a = Sse41.PackUnsignedSaturate(a0, a1); // x r
+ Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
+ Vector128 c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
+ Vector128 d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
+
+ ref ushort outputRef = ref MemoryMarshal.GetReference(values);
+ Unsafe.As>(ref outputRef) = d.AsUInt16();
+
+ for (int i = 0; i < span; i++)
+ {
+ ++histo[values[i]];
+ }
+ }
+ }
+
+ int leftOver = tileWidth & (span - 1);
+ if (leftOver > 0)
+ {
+ CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
+ }
+ }
+ else
+#endif
+ {
+ CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
+ }
+ }
+
+ private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo)
+ {
+ int pos = 0;
+ while (tileHeight-- > 0)
+ {
+ for (int x = 0; x < tileWidth; x++)
+ {
+ int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
+ ++histo[idx];
+ }
+
+ pos += stride;
+ }
+ }
+ }
+}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs b/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs
index 7f4d0307bc..bdaf30dc9c 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs
@@ -87,7 +87,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (nonzeros <= 1)
{
- output.AsSpan(0, numSymbols).Fill(0);
+ output.AsSpan(0, numSymbols).Clear();
}
else
{
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
index f2d4fb189f..b52f8eb5d5 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
@@ -152,10 +152,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static int HistogramCopyAndAnalyze(List origHistograms, List histograms, ushort[] histogramSymbols)
{
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int clusterId = 0, i = 0; i < origHistograms.Count; i++)
{
Vp8LHistogram origHistogram = origHistograms[i];
- origHistogram.UpdateHistogramCost();
+ origHistogram.UpdateHistogramCost(stats, bitsEntropy);
// Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77).
if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4])
@@ -175,7 +177,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return numUsed;
}
- private static void HistogramCombineEntropyBin(List histograms, ushort[] clusters, ushort[] clusterMappings, Vp8LHistogram curCombo, ushort[] binMap, int numBins, double combineCostFactor)
+ private static void HistogramCombineEntropyBin(
+ List histograms,
+ ushort[] clusters,
+ ushort[] clusterMappings,
+ Vp8LHistogram curCombo,
+ ushort[] binMap,
+ int numBins,
+ double combineCostFactor)
{
var binInfo = new HistogramBinInfo[BinSize];
for (int idx = 0; idx < numBins; idx++)
@@ -191,6 +200,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
var indicesToRemove = new List();
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int idx = 0; idx < histograms.Count; idx++)
{
if (histograms[idx] == null)
@@ -209,7 +220,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Try to merge #idx into #first (both share the same binId)
double bitCost = histograms[idx].BitCost;
double bitCostThresh = -bitCost * combineCostFactor;
- double currCostDiff = histograms[first].AddEval(histograms[idx], bitCostThresh, curCombo);
+ double currCostDiff = histograms[first].AddEval(histograms[idx], stats, bitsEntropy, bitCostThresh, curCombo);
if (currCostDiff < bitCostThresh)
{
@@ -276,7 +287,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Create a mapping from a cluster id to its minimal version.
int clusterMax = 0;
- clusterMappingsTmp.AsSpan().Fill(0);
+ clusterMappingsTmp.AsSpan().Clear();
// Re-map the ids.
for (int i = 0; i < symbols.Length; i++)
@@ -308,6 +319,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numUsed = histograms.Count(h => h != null);
int outerIters = numUsed;
int numTriesNoSuccess = outerIters / 2;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
if (numUsed < minClusterSize)
{
@@ -354,7 +367,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
idx2 = mappings[idx2];
// Calculate cost reduction on combination.
- double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost);
+ double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost, stats, bitsEntropy);
// Found a better pair?
if (currCost < 0)
@@ -428,7 +441,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (doEval)
{
// Re-evaluate the cost of an updated pair.
- HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], 0.0d, p);
+ HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], stats, bitsEntropy, 0.0d, p);
if (p.CostDiff >= 0.0d)
{
histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1];
@@ -456,6 +469,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Priority list of histogram pairs.
var histoPriorityList = new List();
int maxSize = histoSize * histoSize;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i < histoSize; i++)
{
@@ -471,7 +486,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue;
}
- HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d);
+ HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d, stats, bitsEntropy);
}
}
@@ -510,7 +525,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue;
}
- HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d);
+ HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d, stats, bitsEntropy);
}
}
}
@@ -519,6 +534,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
int inSize = input.Count;
int outSize = output.Count;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
if (outSize > 1)
{
for (int i = 0; i < inSize; i++)
@@ -534,7 +551,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bestBits = double.MaxValue;
for (int k = 0; k < outSize; k++)
{
- double curBits = output[k].AddThresh(input[i], bestBits);
+ double curBits = output[k].AddThresh(input[i], stats, bitsEntropy, bestBits);
if (k == 0 || curBits < bestBits)
{
bestBits = curBits;
@@ -577,7 +594,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Create a pair from indices "idx1" and "idx2" provided its cost is inferior to "threshold", a negative entropy.
///
/// The cost of the pair, or 0 if it superior to threshold.
- private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold)
+ private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{
var pair = new HistogramPair();
@@ -598,7 +615,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHistogram h1 = histograms[idx1];
Vp8LHistogram h2 = histograms[idx2];
- HistoListUpdatePair(h1, h2, threshold, pair);
+ HistoListUpdatePair(h1, h2, stats, bitsEntropy, threshold, pair);
// Do not even consider the pair if it does not improve the entropy.
if (pair.CostDiff >= threshold)
@@ -616,11 +633,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one.
///
- private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, double threshold, HistogramPair pair)
+ private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double threshold, HistogramPair pair)
{
double sumCost = h1.BitCost + h2.BitCost;
pair.CostCombo = 0.0d;
- h1.GetCombinedHistogramEntropy(h2, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
+ h1.GetCombinedHistogramEntropy(h2, stats, bitsEntropy, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
pair.CostCombo = cost;
pair.CostDiff = pair.CostCombo - sumCost;
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
index cd8be9aac3..0376311ed9 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
@@ -49,14 +49,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
return -1;
}
- else if (t1.TotalCount < t2.TotalCount)
+
+ if (t1.TotalCount < t2.TotalCount)
{
return 1;
}
- else
- {
- return t1.Value < t2.Value ? -1 : 1;
- }
+
+ return t1.Value < t2.Value ? -1 : 1;
}
public IDeepCloneable DeepClone() => new HuffmanTree(this);
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
index f2321d6813..5db01ca1c7 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
@@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static void CreateHuffmanTree(uint[] histogram, int treeDepthLimit, bool[] bufRle, HuffmanTree[] huffTree, HuffmanTreeCode huffCode)
{
int numSymbols = huffCode.NumSymbols;
- bufRle.AsSpan().Fill(false);
+ bufRle.AsSpan().Clear();
OptimizeHuffmanForRle(numSymbols, bufRle, histogram);
GenerateOptimalTree(huffTree, histogram, numSymbols, treeDepthLimit, huffCode.CodeLengths);
@@ -202,9 +202,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
// Build the Huffman tree.
- HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray();
+#if NET5_0_OR_GREATER
+ Span treeSlice = tree.AsSpan(0, treeSize);
+ treeSlice.Sort(HuffmanTree.Compare);
+#else
+ HuffmanTree[] treeCopy = tree.AsSpan(0, treeSize).ToArray();
Array.Sort(treeCopy, HuffmanTree.Compare);
treeCopy.AsSpan().CopyTo(tree);
+#endif
if (treeSize > 1)
{
diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
index b7f94415be..471c083cda 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
@@ -27,6 +27,36 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const double Log2Reciprocal = 1.44269504088896338700465094007086;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector256 AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
+
+ private static readonly Vector128 AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
+
+ private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector256 SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
+
+ private static readonly Vector128 SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
+
+ private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector128 TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector256 TransformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector128 TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly Vector256 TransformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector128 TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector256 TransformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+#endif
+
///
/// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly
@@ -97,80 +127,68 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
- var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length;
- fixed (uint* p = pixelData)
+ nint i;
+ for (i = 0; i <= numPixels - 8; i += 8)
{
- int i;
- for (i = 0; i + 8 <= numPixels; i += 8)
- {
- uint* idx = p + i;
- Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte();
- Vector256 in0g0g = Avx2.Shuffle(input, mask);
- Vector256 output = Avx2.Add(input, in0g0g);
- Avx.Store((byte*)idx, output);
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
+ Vector256 input = Unsafe.As>(ref pos).AsByte();
+ Vector256 in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
+ Vector256 output = Avx2.Add(input, in0g0g);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (i != numPixels)
- {
- AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
- }
+ if (i != numPixels)
+ {
+ AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Ssse3.IsSupported)
{
- var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length;
- fixed (uint* p = pixelData)
+ nint i;
+ for (i = 0; i <= numPixels - 4; i += 4)
{
- int i;
- for (i = 0; i + 4 <= numPixels; i += 4)
- {
- uint* idx = p + i;
- Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte();
- Vector128 in0g0g = Ssse3.Shuffle(input, mask);
- Vector128 output = Sse2.Add(input, in0g0g);
- Sse2.Store((byte*)idx, output.AsByte());
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
+ Vector128 input = Unsafe.As>(ref pos).AsByte();
+ Vector128 in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
+ Vector128 output = Sse2.Add(input, in0g0g);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (i != numPixels)
- {
- AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
- }
+ if (i != numPixels)
+ {
+ AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Sse2.IsSupported)
{
- byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length;
- fixed (uint* p = pixelData)
+ nint i;
+ for (i = 0; i <= numPixels - 4; i += 4)
{
- int i;
- for (i = 0; i + 4 <= numPixels; i += 4)
- {
- uint* idx = p + i;
- Vector128 input = Sse2.LoadVector128((ushort*)idx);
- Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
- Vector128 b = Sse2.ShuffleLow(a, mask);
- Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g
- Vector128 output = Sse2.Add(input.AsByte(), c.AsByte());
- Sse2.Store((byte*)idx, output);
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
+ Vector128 input = Unsafe.As>(ref pos).AsByte();
+ Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
+ Vector128 b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
+ Vector128 output = Sse2.Add(input.AsByte(), c.AsByte());
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (i != numPixels)
- {
- AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
- }
+ if (i != numPixels)
+ {
+ AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else
#endif
{
- AddGreenToBlueAndRedNoneVectorized(pixelData);
+ AddGreenToBlueAndRedScalar(pixelData);
}
}
- private static void AddGreenToBlueAndRedNoneVectorized(Span pixelData)
+ private static void AddGreenToBlueAndRedScalar(Span pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
@@ -189,80 +207,68 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
- var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length;
- fixed (uint* p = pixelData)
+ nint i;
+ for (i = 0; i <= numPixels - 8; i += 8)
{
- int i;
- for (i = 0; i + 8 <= numPixels; i += 8)
- {
- uint* idx = p + i;
- Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte();
- Vector256 in0g0g = Avx2.Shuffle(input, mask);
- Vector256 output = Avx2.Subtract(input, in0g0g);
- Avx.Store((byte*)idx, output);
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
+ Vector256 input = Unsafe.As>(ref pos).AsByte();
+ Vector256 in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
+ Vector256 output = Avx2.Subtract(input, in0g0g);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (i != numPixels)
- {
- SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
- }
+ if (i != numPixels)
+ {
+ SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Ssse3.IsSupported)
{
- var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length;
- fixed (uint* p = pixelData)
+ nint i;
+ for (i = 0; i <= numPixels - 4; i += 4)
{
- int i;
- for (i = 0; i + 4 <= numPixels; i += 4)
- {
- uint* idx = p + i;
- Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte();
- Vector128 in0g0g = Ssse3.Shuffle(input, mask);
- Vector128 output = Sse2.Subtract(input, in0g0g);
- Sse2.Store((byte*)idx, output.AsByte());
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
+ Vector128 input = Unsafe.As>(ref pos).AsByte();
+ Vector128 in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
+ Vector128 output = Sse2.Subtract(input, in0g0g);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (i != numPixels)
- {
- SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
- }
+ if (i != numPixels)
+ {
+ SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Sse2.IsSupported)
{
- byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length;
- fixed (uint* p = pixelData)
+ nint i;
+ for (i = 0; i <= numPixels - 4; i += 4)
{
- int i;
- for (i = 0; i + 4 <= numPixels; i += 4)
- {
- uint* idx = p + i;
- Vector128 input = Sse2.LoadVector128((ushort*)idx);
- Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
- Vector128 b = Sse2.ShuffleLow(a, mask);
- Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g
- Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte());
- Sse2.Store((byte*)idx, output);
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
+ Vector128 input = Unsafe.As>(ref pos).AsByte();
+ Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
+ Vector128 b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
+ Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte());
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (i != numPixels)
- {
- SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
- }
+ if (i != numPixels)
+ {
+ SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else
#endif
{
- SubtractGreenFromBlueAndRedNoneVectorized(pixelData);
+ SubtractGreenFromBlueAndRedScalar(pixelData);
}
}
- private static void SubtractGreenFromBlueAndRedNoneVectorized(Span pixelData)
+ private static void SubtractGreenFromBlueAndRedScalar(Span pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
@@ -385,52 +391,74 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
///
/// The Vp8LMultipliers.
- /// The pixel data to transform.
+ /// The pixel data to transform.
/// The number of pixels to process.
- public static void TransformColor(Vp8LMultipliers m, Span data, int numPixels)
+ public static void TransformColor(Vp8LMultipliers m, Span pixelData, int numPixels)
{
#if SUPPORTS_RUNTIME_INTRINSICS
- if (Sse2.IsSupported)
+ if (Avx2.IsSupported && numPixels >= 8)
+ {
+ Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
+ Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
+
+ nint idx;
+ for (idx = 0; idx <= numPixels - 8; idx += 8)
+ {
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
+ Vector256 input = Unsafe.As>(ref pos);
+ Vector256 a = Avx2.And(input.AsByte(), TransformColorAlphaGreenMask256);
+ Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
+ Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
+ Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
+ Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8);
+ Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
+ Vector256 g = Avx2.ShiftRightLogical(f.AsInt32(), 16);
+ Vector256 h = Avx2.Add(g.AsByte(), d.AsByte());
+ Vector256 i = Avx2.And(h, TransformColorRedBlueMask256);
+ Vector256 output = Avx2.Subtract(input.AsByte(), i);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
+
+ if (idx != numPixels)
+ {
+ TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
+ }
+ }
+ else if (Sse2.IsSupported)
{
Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
- var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
- byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
- fixed (uint* src = data)
+ nint idx;
+ for (idx = 0; idx <= numPixels - 4; idx += 4)
{
- int idx;
- for (idx = 0; idx + 4 <= numPixels; idx += 4)
- {
- uint* pos = src + idx;
- Vector128 input = Sse2.LoadVector128(pos);
- Vector128 a = Sse2.And(input.AsByte(), maskalphagreen);
- Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask);
- Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask);
- Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
- Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
- Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
- Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
- Vector128 h = Sse2.Add(g.AsByte(), d.AsByte());
- Vector128 i = Sse2.And(h, maskredblue);
- Vector128 output = Sse2.Subtract(input.AsByte(), i);
- Sse2.Store((byte*)pos, output);
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
+ Vector128 input = Unsafe.As>(ref pos);
+ Vector128 a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
+ Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
+ Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
+ Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
+ Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
+ Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
+ Vector128 h = Sse2.Add(g.AsByte(), d.AsByte());
+ Vector128 i = Sse2.And(h, TransformColorRedBlueMask);
+ Vector128 output = Sse2.Subtract(input.AsByte(), i);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (idx != numPixels)
- {
- TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx);
- }
+ if (idx != numPixels)
+ {
+ TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
}
}
else
#endif
{
- TransformColorNoneVectorized(m, data, numPixels);
+ TransformColorScalar(m, pixelData, numPixels);
}
}
- private static void TransformColorNoneVectorized(Vp8LMultipliers m, Span data, int numPixels)
+ private static void TransformColorScalar(Vp8LMultipliers m, Span data, int numPixels)
{
for (int i = 0; i < numPixels; i++)
{
@@ -456,47 +484,71 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static void TransformColorInverse(Vp8LMultipliers m, Span pixelData)
{
#if SUPPORTS_RUNTIME_INTRINSICS
- if (Sse2.IsSupported)
+ if (Avx2.IsSupported && pixelData.Length >= 8)
+ {
+ Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
+ Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
+ nint idx;
+ for (idx = 0; idx <= pixelData.Length - 8; idx += 8)
+ {
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
+ Vector256 input = Unsafe.As>(ref pos);
+ Vector256 a = Avx2.And(input.AsByte(), TransformColorInverseAlphaGreenMask256);
+ Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
+ Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
+ Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
+ Vector256 e = Avx2.Add(input.AsByte(), d.AsByte());
+ Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8);
+ Vector256 g = Avx2.MultiplyHigh(f, multsb2.AsInt16());
+ Vector256 h = Avx2.ShiftRightLogical(g.AsInt32(), 8);
+ Vector256 i = Avx2.Add(h.AsByte(), f.AsByte());
+ Vector256 j = Avx2.ShiftRightLogical(i.AsInt16(), 8);
+ Vector256 output = Avx2.Or(j.AsByte(), a);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
+
+ if (idx != pixelData.Length)
+ {
+ TransformColorInverseScalar(m, pixelData.Slice((int)idx));
+ }
+ }
+ else if (Sse2.IsSupported)
{
Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
- var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
- fixed (uint* src = pixelData)
+
+ nint idx;
+ for (idx = 0; idx <= pixelData.Length - 4; idx += 4)
{
- int idx;
- for (idx = 0; idx + 4 <= pixelData.Length; idx += 4)
- {
- uint* pos = src + idx;
- Vector128 input = Sse2.LoadVector128(pos);
- Vector128 a = Sse2.And(input.AsByte(), maskalphagreen);
- Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask);
- Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask);
- Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
- Vector128 e = Sse2.Add(input.AsByte(), d.AsByte());
- Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
- Vector128 g = Sse2.MultiplyHigh(f, multsb2.AsInt16());
- Vector128 h = Sse2.ShiftRightLogical(g.AsInt32(), 8);
- Vector128 i = Sse2.Add(h.AsByte(), f.AsByte());
- Vector128 j = Sse2.ShiftRightLogical(i.AsInt16(), 8);
- Vector128 output = Sse2.Or(j.AsByte(), a);
- Sse2.Store((byte*)pos, output);
- }
+ ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
+ Vector128 input = Unsafe.As>(ref pos);
+ Vector128 a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
+ Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
+ Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
+ Vector128 e = Sse2.Add(input.AsByte(), d.AsByte());
+ Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
+ Vector128 g = Sse2.MultiplyHigh(f, multsb2.AsInt16());
+ Vector128 h = Sse2.ShiftRightLogical(g.AsInt32(), 8);
+ Vector128 i = Sse2.Add(h.AsByte(), f.AsByte());
+ Vector128 j = Sse2.ShiftRightLogical(i.AsInt16(), 8);
+ Vector128 output = Sse2.Or(j.AsByte(), a);
+ Unsafe.As>(ref pos) = output.AsUInt32();
+ }
- if (idx != pixelData.Length)
- {
- TransformColorInverseNoneVectorized(m, pixelData.Slice(idx));
- }
+ if (idx != pixelData.Length)
+ {
+ TransformColorInverseScalar(m, pixelData.Slice((int)idx));
}
}
else
#endif
{
- TransformColorInverseNoneVectorized(m, pixelData);
+ TransformColorInverseScalar(m, pixelData);
}
}
- private static void TransformColorInverseNoneVectorized(Vp8LMultipliers m, Span pixelData)
+ private static void TransformColorInverseScalar(Vp8LMultipliers m, Span pixelData)
{
for (int i = 0; i < pixelData.Length; i++)
{
@@ -551,6 +603,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int mask = tileWidth - 1;
int tilesPerRow = SubSampleSize(width, transform.Bits);
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
+ Span scratch = stackalloc short[8];
while (y < yEnd)
{
int predictorModeIdx = predictorModeIdxBase;
@@ -608,7 +661,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
break;
case 11:
- PredictorAdd11(input + x, output + x - width, xEnd - x, output + x);
+ PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
break;
case 12:
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
@@ -704,7 +757,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Compute the combined Shanon's entropy for distribution {X} and {X+Y}.
///
/// Shanon entropy.
- public static float CombinedShannonEntropy(int[] x, int[] y)
+ public static float CombinedShannonEntropy(Span x, Span y)
{
double retVal = 0.0d;
uint sumX = 0, sumXY = 0;
@@ -730,6 +783,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (float)retVal;
}
+ [MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorRed(sbyte greenToRed, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
@@ -738,6 +792,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)(newRed & 0xff);
}
+ [MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
@@ -751,6 +806,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// Fast calculation of log2(v) for integer input.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);
///
@@ -779,7 +835,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static float FastSLog2Slow(uint v)
{
- Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
+ DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
@@ -803,15 +859,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int correction = (int)((23 * (origV & (y - 1))) >> 4);
return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction;
}
- else
- {
- return (float)(Log2Reciprocal * v * Math.Log(v));
- }
+
+ return (float)(Log2Reciprocal * v * Math.Log(v));
}
private static float FastLog2Slow(uint v)
{
Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
+
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
@@ -974,11 +1029,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output)
+ private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span scratch)
{
for (int x = 0; x < numberOfPixels; x++)
{
- uint pred = Predictor11(output[x - 1], upper + x);
+ uint pred = Predictor11(output[x - 1], upper + x, scratch);
output[x] = AddPixels(input[x], pred);
}
}
@@ -1031,7 +1086,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)]
- public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]);
+ public static uint Predictor11(uint left, uint* top, Span scratch) => Select(top[0], left, top[-1], scratch);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
@@ -1148,11 +1203,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
- public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output)
+ public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span scratch)
{
for (int x = 0; x < numPixels; x++)
{
- uint pred = Predictor11(input[x - 1], upper + x);
+ uint pred = Predictor11(input[x - 1], upper + x, scratch);
output[x] = SubPixels(input[x], pred);
}
}
@@ -1200,30 +1255,65 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{
- int a = AddSubtractComponentFull(
- (int)(c0 >> 24),
- (int)(c1 >> 24),
- (int)(c2 >> 24));
- int r = AddSubtractComponentFull(
- (int)((c0 >> 16) & 0xff),
- (int)((c1 >> 16) & 0xff),
- (int)((c2 >> 16) & 0xff));
- int g = AddSubtractComponentFull(
- (int)((c0 >> 8) & 0xff),
- (int)((c1 >> 8) & 0xff),
- (int)((c2 >> 8) & 0xff));
- int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
- return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero);
+ Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero);
+ Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero);
+ Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16());
+ Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16());
+ Vector128 b = Sse2.PackUnsignedSaturate(v2, v2);
+ uint output = Sse2.ConvertToUInt32(b.AsUInt32());
+ return output;
+ }
+#endif
+ {
+ int a = AddSubtractComponentFull(
+ (int)(c0 >> 24),
+ (int)(c1 >> 24),
+ (int)(c2 >> 24));
+ int r = AddSubtractComponentFull(
+ (int)((c0 >> 16) & 0xff),
+ (int)((c1 >> 16) & 0xff),
+ (int)((c2 >> 16) & 0xff));
+ int g = AddSubtractComponentFull(
+ (int)((c0 >> 8) & 0xff),
+ (int)((c1 >> 8) & 0xff),
+ (int)((c2 >> 8) & 0xff));
+ int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
+ return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+ }
}
private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
{
- uint ave = Average2(c0, c1);
- int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
- int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
- int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
- int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
- return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero);
+ Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero);
+ Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero);
+ Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16());
+ Vector128 a0 = Sse2.ShiftRightLogical(avg, 1);
+ Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16());
+ Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16());
+ Vector128 a2 = Sse2.Subtract(a1, bgta);
+ Vector128 a3 = Sse2.ShiftRightArithmetic(a2, 1);
+ Vector128 a4 = Sse2.Add(a0, a3).AsInt16();
+ Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4);
+ uint output = Sse2.ConvertToUInt32(a5.AsUInt32());
+ return output;
+ }
+#endif
+ {
+ uint ave = Average2(c0, c1);
+ int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
+ int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
+ int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
+ int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
+ return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+ }
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -1238,16 +1328,46 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128 MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static Vector256 MkCst32(int hi, int lo) => Vector256.Create((hi << 16) | (lo & 0xffff));
#endif
- private static uint Select(uint a, uint b, uint c)
+ private static uint Select(uint a, uint b, uint c, Span scratch)
{
- int paMinusPb =
- Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
- Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
- Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
- Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
- return paMinusPb <= 0 ? a : b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Span output = scratch;
+ fixed (short* p = output)
+ {
+ Vector128 a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte();
+ Vector128 b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte();
+ Vector128 c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte();
+ Vector128 ac0 = Sse2.SubtractSaturate(a0, c0);
+ Vector128 ca0 = Sse2.SubtractSaturate(c0, a0);
+ Vector128 bc0 = Sse2.SubtractSaturate(b0, c0);
+ Vector128 cb0 = Sse2.SubtractSaturate(c0, b0);
+ Vector128 ac = Sse2.Or(ac0, ca0);
+ Vector128 bc = Sse2.Or(bc0, cb0);
+ Vector128 pa = Sse2.UnpackLow(ac, Vector128.Zero); // |a - c|
+ Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c|
+ Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
+ Sse2.Store((ushort*)p, diff);
+ int paMinusPb = output[3] + output[2] + output[1] + output[0];
+ return (paMinusPb <= 0) ? a : b;
+ }
+ }
+ else
+#endif
+ {
+ int paMinusPb =
+ Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
+ Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
+ Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
+ Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
+ return paMinusPb <= 0 ? a : b;
+ }
}
[MethodImpl(InliningOptions.ShortMethod)]
diff --git a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
index 2d71a7af64..6cd109121d 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
@@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public uint BgraOrDistance { get; set; }
public static PixOrCopy CreateCacheIdx(int idx) =>
- new PixOrCopy()
+ new()
{
Mode = PixOrCopyMode.CacheIdx,
BgraOrDistance = (uint)idx,
@@ -23,14 +23,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
};
public static PixOrCopy CreateLiteral(uint bgra) =>
- new PixOrCopy()
+ new()
{
Mode = PixOrCopyMode.Literal,
BgraOrDistance = bgra,
Len = 1
};
- public static PixOrCopy CreateCopy(uint distance, ushort len) => new PixOrCopy()
+ public static PixOrCopy CreateCopy(uint distance, ushort len) => new()
{
Mode = PixOrCopyMode.Copy,
BgraOrDistance = distance,
diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
index 671e9a043e..a1e04c66a5 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
@@ -5,11 +5,6 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-#if SUPPORTS_RUNTIME_INTRINSICS
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
-#endif
-
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
///