From 257ff1929e341e5b1af94d9adf557e5296ece957 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 29 Oct 2021 23:32:13 +1100 Subject: [PATCH 01/36] Use RgbaVector for color backing --- src/ImageSharp/Color/Color.Conversions.cs | 87 ++++++++++++++++--- src/ImageSharp/Color/Color.cs | 74 ++++++++-------- .../Color/ColorTests.CastFrom.cs | 17 +++- .../Color/ColorTests.ConstructFrom.cs | 4 +- 4 files changed, 125 insertions(+), 57 deletions(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 0455fd26a..abcb54b80 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -17,56 +17,90 @@ namespace SixLabors.ImageSharp /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba64 pixel) => this.data = pixel; + public Color(Rgba64 pixel) + { + RgbaVector vector = default; + vector.FromRgba64(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba32 pixel) => this.data = new Rgba64(pixel); + public Color(Rgba32 pixel) + { + RgbaVector vector = default; + vector.FromRgba32(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Argb32 pixel) => this.data = new Rgba64(pixel); + public Color(Argb32 pixel) + { + RgbaVector vector = default; + vector.FromArgb32(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgra32 pixel) => this.data = new Rgba64(pixel); + public Color(Bgra32 pixel) + { + RgbaVector vector = default; + vector.FromBgra32(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgb24 pixel) => this.data = new Rgba64(pixel); + public Color(Rgb24 pixel) + { + RgbaVector vector = default; + vector.FromRgb24(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgr24 pixel) => this.data = new Rgba64(pixel); + public Color(Bgr24 pixel) + { + RgbaVector vector = default; + vector.FromBgr24(pixel); + this.data = vector; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Vector4 vector) => this.data = new Rgba64(vector); + public Color(Vector4 vector) + { + vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One); + this.data = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W); + } /// /// Converts a to . /// /// The . /// The . - public static explicit operator Vector4(Color color) => color.data.ToVector4(); + public static explicit operator Vector4(Color color) => color.data.ToScaledVector4(); /// /// Converts an to . @@ -74,22 +108,47 @@ namespace SixLabors.ImageSharp /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static explicit operator Color(Vector4 source) => new Color(source); + public static explicit operator Color(Vector4 source) => new(source); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgba32 ToRgba32() => this.data.ToRgba32(); + internal Rgba32 ToRgba32() + { + Rgba32 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgra32 ToBgra32() => this.data.ToBgra32(); + internal Bgra32 ToBgra32() + { + Bgra32 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Argb32 ToArgb32() => this.data.ToArgb32(); + internal Argb32 ToArgb32() + { + Argb32 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Rgb24 ToRgb24() => this.data.ToRgb24(); + internal Rgb24 ToRgb24() + { + Rgb24 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgr24 ToBgr24() => this.data.ToBgr24(); + internal Bgr24 ToBgr24() + { + Bgr24 result = default; + result.FromScaledVector4(this.data.ToScaledVector4()); + return result; + } [MethodImpl(InliningOptions.ShortMethod)] internal Vector4 ToVector4() => this.data.ToVector4(); diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index d5eedc160..9a4df4e62 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -20,26 +20,22 @@ namespace SixLabors.ImageSharp /// public readonly partial struct Color : IEquatable { - private readonly Rgba64 data; + private readonly RgbaVector data; [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b, byte a) { - this.data = new Rgba64( - ColorNumerics.UpscaleFrom8BitTo16Bit(r), - ColorNumerics.UpscaleFrom8BitTo16Bit(g), - ColorNumerics.UpscaleFrom8BitTo16Bit(b), - ColorNumerics.UpscaleFrom8BitTo16Bit(a)); + RgbaVector vector = default; + vector.FromRgba32(new(r, g, b, a)); + this.data = vector; } [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b) { - this.data = new Rgba64( - ColorNumerics.UpscaleFrom8BitTo16Bit(r), - ColorNumerics.UpscaleFrom8BitTo16Bit(g), - ColorNumerics.UpscaleFrom8BitTo16Bit(b), - ushort.MaxValue); + RgbaVector vector = default; + vector.FromRgba32(new(r, g, b)); + this.data = vector; } /// @@ -52,10 +48,7 @@ namespace SixLabors.ImageSharp /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator ==(Color left, Color right) - { - return left.Equals(right); - } + public static bool operator ==(Color left, Color right) => left.Equals(right); /// /// Checks whether two structures are equal. @@ -67,10 +60,7 @@ namespace SixLabors.ImageSharp /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator !=(Color left, Color right) - { - return !left.Equals(right); - } + public static bool operator !=(Color left, Color right) => !left.Equals(right); /// /// Creates a from RGBA bytes. @@ -81,7 +71,7 @@ namespace SixLabors.ImageSharp /// The alpha component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a); + public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a); /// /// Creates a from RGB bytes. @@ -91,7 +81,17 @@ namespace SixLabors.ImageSharp /// The blue component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b); + public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b); + + /// + /// Creates a from the given . + /// + /// The pixel to convert from. + /// The pixel format. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Color FromPixel(TPixel pixel) + where TPixel : unmanaged, IPixel => new(pixel.ToScaledVector4()); /// /// Creates a new instance of the struct @@ -207,13 +207,18 @@ namespace SixLabors.ImageSharp /// /// A hexadecimal string representation of the value. [MethodImpl(InliningOptions.ShortMethod)] - public string ToHex() => this.data.ToRgba32().ToHex(); + public string ToHex() + { + Rgba32 rgba = default; + this.data.ToRgba32(ref rgba); + return rgba.ToHex(); + } /// public override string ToString() => this.ToHex(); /// - /// Converts the color instance to a specified type. + /// Converts the color instance to a specified type. /// /// The pixel type to convert to. /// The pixel value. @@ -222,12 +227,12 @@ namespace SixLabors.ImageSharp where TPixel : unmanaged, IPixel { TPixel pixel = default; - pixel.FromRgba64(this.data); + pixel.FromScaledVector4(this.data.ToScaledVector4()); return pixel; } /// - /// Bulk converts a span of to a span of a specified type. + /// Bulk converts a span of to a span of a specified type. /// /// The pixel type to convert to. /// The configuration. @@ -240,28 +245,19 @@ namespace SixLabors.ImageSharp Span destination) where TPixel : unmanaged, IPixel { - ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source); - PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination); + ReadOnlySpan rgbaSpan = MemoryMarshal.Cast(source); + PixelOperations.Instance.From(configuration, rgbaSpan, destination); } /// [MethodImpl(InliningOptions.ShortMethod)] - public bool Equals(Color other) - { - return this.data.PackedValue == other.data.PackedValue; - } + public bool Equals(Color other) => this.data.Equals(other.data); /// - public override bool Equals(object obj) - { - return obj is Color other && this.Equals(other); - } + public override bool Equals(object obj) => obj is Color other && this.Equals(other); /// [MethodImpl(InliningOptions.ShortMethod)] - public override int GetHashCode() - { - return this.data.PackedValue.GetHashCode(); - } + public override int GetHashCode() => this.data.GetHashCode(); } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs index 38b94f486..356ef7351 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs @@ -66,7 +66,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: Color color = source; @@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: Color color = source; @@ -88,6 +88,19 @@ namespace SixLabors.ImageSharp.Tests Bgr24 data = color.ToPixel(); Assert.Equal(source, data); } + + [Fact] + public void TPixel() + { + var source = new RgbaVector(1, .1F, .133F, .864F); + + // Act: + var color = Color.FromPixel(source); + + // Assert: + RgbaVector data = color.ToPixel(); + Assert.Equal(source, data); + } } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs index 89276014b..dd51f3a6c 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs @@ -66,7 +66,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: var color = new Color(source); @@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: var color = new Color(source); From ef90575a119335314ea69c4cbd556469d91f032f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 1 Nov 2021 21:42:32 +1100 Subject: [PATCH 02/36] Revert "Use RgbaVector for color backing" This reverts commit 257ff1929e341e5b1af94d9adf557e5296ece957. --- src/ImageSharp/Color/Color.Conversions.cs | 87 +++---------------- src/ImageSharp/Color/Color.cs | 74 ++++++++-------- .../Color/ColorTests.CastFrom.cs | 17 +--- .../Color/ColorTests.ConstructFrom.cs | 4 +- 4 files changed, 57 insertions(+), 125 deletions(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index abcb54b80..0455fd26a 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -17,90 +17,56 @@ namespace SixLabors.ImageSharp /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba64 pixel) - { - RgbaVector vector = default; - vector.FromRgba64(pixel); - this.data = vector; - } + public Color(Rgba64 pixel) => this.data = pixel; /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba32 pixel) - { - RgbaVector vector = default; - vector.FromRgba32(pixel); - this.data = vector; - } + public Color(Rgba32 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Argb32 pixel) - { - RgbaVector vector = default; - vector.FromArgb32(pixel); - this.data = vector; - } + public Color(Argb32 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgra32 pixel) - { - RgbaVector vector = default; - vector.FromBgra32(pixel); - this.data = vector; - } + public Color(Bgra32 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgb24 pixel) - { - RgbaVector vector = default; - vector.FromRgb24(pixel); - this.data = vector; - } + public Color(Rgb24 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgr24 pixel) - { - RgbaVector vector = default; - vector.FromBgr24(pixel); - this.data = vector; - } + public Color(Bgr24 pixel) => this.data = new Rgba64(pixel); /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Vector4 vector) - { - vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One); - this.data = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W); - } + public Color(Vector4 vector) => this.data = new Rgba64(vector); /// /// Converts a to . /// /// The . /// The . - public static explicit operator Vector4(Color color) => color.data.ToScaledVector4(); + public static explicit operator Vector4(Color color) => color.data.ToVector4(); /// /// Converts an to . @@ -108,47 +74,22 @@ namespace SixLabors.ImageSharp /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static explicit operator Color(Vector4 source) => new(source); + public static explicit operator Color(Vector4 source) => new Color(source); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgba32 ToRgba32() - { - Rgba32 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Rgba32 ToRgba32() => this.data.ToRgba32(); [MethodImpl(InliningOptions.ShortMethod)] - internal Bgra32 ToBgra32() - { - Bgra32 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Bgra32 ToBgra32() => this.data.ToBgra32(); [MethodImpl(InliningOptions.ShortMethod)] - internal Argb32 ToArgb32() - { - Argb32 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Argb32 ToArgb32() => this.data.ToArgb32(); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgb24 ToRgb24() - { - Rgb24 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Rgb24 ToRgb24() => this.data.ToRgb24(); [MethodImpl(InliningOptions.ShortMethod)] - internal Bgr24 ToBgr24() - { - Bgr24 result = default; - result.FromScaledVector4(this.data.ToScaledVector4()); - return result; - } + internal Bgr24 ToBgr24() => this.data.ToBgr24(); [MethodImpl(InliningOptions.ShortMethod)] internal Vector4 ToVector4() => this.data.ToVector4(); diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index 9a4df4e62..d5eedc160 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -20,22 +20,26 @@ namespace SixLabors.ImageSharp /// public readonly partial struct Color : IEquatable { - private readonly RgbaVector data; + private readonly Rgba64 data; [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b, byte a) { - RgbaVector vector = default; - vector.FromRgba32(new(r, g, b, a)); - this.data = vector; + this.data = new Rgba64( + ColorNumerics.UpscaleFrom8BitTo16Bit(r), + ColorNumerics.UpscaleFrom8BitTo16Bit(g), + ColorNumerics.UpscaleFrom8BitTo16Bit(b), + ColorNumerics.UpscaleFrom8BitTo16Bit(a)); } [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b) { - RgbaVector vector = default; - vector.FromRgba32(new(r, g, b)); - this.data = vector; + this.data = new Rgba64( + ColorNumerics.UpscaleFrom8BitTo16Bit(r), + ColorNumerics.UpscaleFrom8BitTo16Bit(g), + ColorNumerics.UpscaleFrom8BitTo16Bit(b), + ushort.MaxValue); } /// @@ -48,7 +52,10 @@ namespace SixLabors.ImageSharp /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator ==(Color left, Color right) => left.Equals(right); + public static bool operator ==(Color left, Color right) + { + return left.Equals(right); + } /// /// Checks whether two structures are equal. @@ -60,7 +67,10 @@ namespace SixLabors.ImageSharp /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator !=(Color left, Color right) => !left.Equals(right); + public static bool operator !=(Color left, Color right) + { + return !left.Equals(right); + } /// /// Creates a from RGBA bytes. @@ -71,7 +81,7 @@ namespace SixLabors.ImageSharp /// The alpha component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a); + public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a); /// /// Creates a from RGB bytes. @@ -81,17 +91,7 @@ namespace SixLabors.ImageSharp /// The blue component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b); - - /// - /// Creates a from the given . - /// - /// The pixel to convert from. - /// The pixel format. - /// The . - [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromPixel(TPixel pixel) - where TPixel : unmanaged, IPixel => new(pixel.ToScaledVector4()); + public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b); /// /// Creates a new instance of the struct @@ -207,18 +207,13 @@ namespace SixLabors.ImageSharp /// /// A hexadecimal string representation of the value. [MethodImpl(InliningOptions.ShortMethod)] - public string ToHex() - { - Rgba32 rgba = default; - this.data.ToRgba32(ref rgba); - return rgba.ToHex(); - } + public string ToHex() => this.data.ToRgba32().ToHex(); /// public override string ToString() => this.ToHex(); /// - /// Converts the color instance to a specified type. + /// Converts the color instance to a specified type. /// /// The pixel type to convert to. /// The pixel value. @@ -227,12 +222,12 @@ namespace SixLabors.ImageSharp where TPixel : unmanaged, IPixel { TPixel pixel = default; - pixel.FromScaledVector4(this.data.ToScaledVector4()); + pixel.FromRgba64(this.data); return pixel; } /// - /// Bulk converts a span of to a span of a specified type. + /// Bulk converts a span of to a span of a specified type. /// /// The pixel type to convert to. /// The configuration. @@ -245,19 +240,28 @@ namespace SixLabors.ImageSharp Span destination) where TPixel : unmanaged, IPixel { - ReadOnlySpan rgbaSpan = MemoryMarshal.Cast(source); - PixelOperations.Instance.From(configuration, rgbaSpan, destination); + ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source); + PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination); } /// [MethodImpl(InliningOptions.ShortMethod)] - public bool Equals(Color other) => this.data.Equals(other.data); + public bool Equals(Color other) + { + return this.data.PackedValue == other.data.PackedValue; + } /// - public override bool Equals(object obj) => obj is Color other && this.Equals(other); + public override bool Equals(object obj) + { + return obj is Color other && this.Equals(other); + } /// [MethodImpl(InliningOptions.ShortMethod)] - public override int GetHashCode() => this.data.GetHashCode(); + public override int GetHashCode() + { + return this.data.PackedValue.GetHashCode(); + } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs index 356ef7351..38b94f486 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastFrom.cs @@ -66,7 +66,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: Color color = source; @@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: Color color = source; @@ -88,19 +88,6 @@ namespace SixLabors.ImageSharp.Tests Bgr24 data = color.ToPixel(); Assert.Equal(source, data); } - - [Fact] - public void TPixel() - { - var source = new RgbaVector(1, .1F, .133F, .864F); - - // Act: - var color = Color.FromPixel(source); - - // Assert: - RgbaVector data = color.ToPixel(); - Assert.Equal(source, data); - } } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs index dd51f3a6c..89276014b 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.ConstructFrom.cs @@ -66,7 +66,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: var color = new Color(source); @@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: var color = new Color(source); From 2ec17e7c6a31b31fafb75cfd85613681fa4125d6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 1 Nov 2021 22:39:20 +1100 Subject: [PATCH 03/36] Use box pixel for high precision --- src/ImageSharp/Color/Color.Conversions.cs | 117 +++++++++++++++--- src/ImageSharp/Color/Color.cs | 77 ++++++++---- .../Color/ColorTests.CastTo.cs | 17 ++- 3 files changed, 171 insertions(+), 40 deletions(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 0455fd26a..424b7dcdf 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -17,56 +17,85 @@ namespace SixLabors.ImageSharp /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba64 pixel) => this.data = pixel; + public Color(Rgba64 pixel) + { + this.data = pixel; + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgba32 pixel) => this.data = new Rgba64(pixel); + public Color(Rgba32 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Argb32 pixel) => this.data = new Rgba64(pixel); + public Color(Argb32 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgra32 pixel) => this.data = new Rgba64(pixel); + public Color(Bgra32 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Rgb24 pixel) => this.data = new Rgba64(pixel); + public Color(Rgb24 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Bgr24 pixel) => this.data = new Rgba64(pixel); + public Color(Bgr24 pixel) + { + this.data = new Rgba64(pixel); + this.boxedHighPrecisionPixel = null; + } /// /// Initializes a new instance of the struct. /// /// The containing the color information. [MethodImpl(InliningOptions.ShortMethod)] - public Color(Vector4 vector) => this.data = new Rgba64(vector); + public Color(Vector4 vector) + { + vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One); + this.boxedHighPrecisionPixel = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W); + this.data = default; + } /// /// Converts a to . /// /// The . /// The . - public static explicit operator Vector4(Color color) => color.data.ToVector4(); + public static explicit operator Vector4(Color color) => color.ToVector4(); /// /// Converts an to . @@ -74,24 +103,82 @@ namespace SixLabors.ImageSharp /// The . /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static explicit operator Color(Vector4 source) => new Color(source); + public static explicit operator Color(Vector4 source) => new(source); [MethodImpl(InliningOptions.ShortMethod)] - internal Rgba32 ToRgba32() => this.data.ToRgba32(); + internal Rgba32 ToRgba32() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToRgba32(); + } + + Rgba32 value = default; + this.boxedHighPrecisionPixel.ToRgba32(ref value); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgra32 ToBgra32() => this.data.ToBgra32(); + internal Bgra32 ToBgra32() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToBgra32(); + } + + Bgra32 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Argb32 ToArgb32() => this.data.ToArgb32(); + internal Argb32 ToArgb32() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToArgb32(); + } + + Argb32 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Rgb24 ToRgb24() => this.data.ToRgb24(); + internal Rgb24 ToRgb24() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToRgb24(); + } + + Rgb24 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Bgr24 ToBgr24() => this.data.ToBgr24(); + internal Bgr24 ToBgr24() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToBgr24(); + } + + Bgr24 value = default; + value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); + return value; + } [MethodImpl(InliningOptions.ShortMethod)] - internal Vector4 ToVector4() => this.data.ToVector4(); + internal Vector4 ToVector4() + { + if (this.boxedHighPrecisionPixel is null) + { + return this.data.ToScaledVector4(); + } + + return this.boxedHighPrecisionPixel.ToScaledVector4(); + } } } diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index d5eedc160..fe66efcfb 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -4,7 +4,6 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp @@ -21,6 +20,7 @@ namespace SixLabors.ImageSharp public readonly partial struct Color : IEquatable { private readonly Rgba64 data; + private readonly IPixel boxedHighPrecisionPixel; [MethodImpl(InliningOptions.ShortMethod)] private Color(byte r, byte g, byte b, byte a) @@ -30,6 +30,8 @@ namespace SixLabors.ImageSharp ColorNumerics.UpscaleFrom8BitTo16Bit(g), ColorNumerics.UpscaleFrom8BitTo16Bit(b), ColorNumerics.UpscaleFrom8BitTo16Bit(a)); + + this.boxedHighPrecisionPixel = null; } [MethodImpl(InliningOptions.ShortMethod)] @@ -40,6 +42,15 @@ namespace SixLabors.ImageSharp ColorNumerics.UpscaleFrom8BitTo16Bit(g), ColorNumerics.UpscaleFrom8BitTo16Bit(b), ushort.MaxValue); + + this.boxedHighPrecisionPixel = null; + } + + [MethodImpl(InliningOptions.ShortMethod)] + private Color(IPixel pixel) + { + this.boxedHighPrecisionPixel = pixel; + this.data = default; } /// @@ -52,13 +63,10 @@ namespace SixLabors.ImageSharp /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator ==(Color left, Color right) - { - return left.Equals(right); - } + public static bool operator ==(Color left, Color right) => left.Equals(right); /// - /// Checks whether two structures are equal. + /// Checks whether two structures are not equal. /// /// The left hand operand. /// The right hand operand. @@ -67,10 +75,7 @@ namespace SixLabors.ImageSharp /// otherwise, false. /// [MethodImpl(InliningOptions.ShortMethod)] - public static bool operator !=(Color left, Color right) - { - return !left.Equals(right); - } + public static bool operator !=(Color left, Color right) => !left.Equals(right); /// /// Creates a from RGBA bytes. @@ -81,7 +86,7 @@ namespace SixLabors.ImageSharp /// The alpha component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgba(byte r, byte g, byte b, byte a) => new Color(r, g, b, a); + public static Color FromRgba(byte r, byte g, byte b, byte a) => new(r, g, b, a); /// /// Creates a from RGB bytes. @@ -91,7 +96,18 @@ namespace SixLabors.ImageSharp /// The blue component (0-255). /// The . [MethodImpl(InliningOptions.ShortMethod)] - public static Color FromRgb(byte r, byte g, byte b) => new Color(r, g, b); + public static Color FromRgb(byte r, byte g, byte b) => new(r, g, b); + + /// + /// Creates a from the given . + /// + /// The pixel to convert from. + /// The pixel format. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Color FromPixel(TPixel pixel) + where TPixel : unmanaged, IPixel + => new(pixel); /// /// Creates a new instance of the struct @@ -213,7 +229,7 @@ namespace SixLabors.ImageSharp public override string ToString() => this.ToHex(); /// - /// Converts the color instance to a specified type. + /// Converts the color instance to a specified type. /// /// The pixel type to convert to. /// The pixel value. @@ -221,13 +237,18 @@ namespace SixLabors.ImageSharp public TPixel ToPixel() where TPixel : unmanaged, IPixel { - TPixel pixel = default; + if (this.boxedHighPrecisionPixel is TPixel pixel) + { + return pixel; + } + + pixel = default; pixel.FromRgba64(this.data); return pixel; } /// - /// Bulk converts a span of to a span of a specified type. + /// Bulk converts a span of to a span of a specified type. /// /// The pixel type to convert to. /// The configuration. @@ -240,28 +261,38 @@ namespace SixLabors.ImageSharp Span destination) where TPixel : unmanaged, IPixel { - ReadOnlySpan rgba64Span = MemoryMarshal.Cast(source); - PixelOperations.Instance.FromRgba64(configuration, rgba64Span, destination); + Guard.DestinationShouldNotBeTooShort(source, destination, nameof(destination)); + for (int i = 0; i < source.Length; i++) + { + destination[i] = source[i].ToPixel(); + } } /// [MethodImpl(InliningOptions.ShortMethod)] public bool Equals(Color other) { - return this.data.PackedValue == other.data.PackedValue; + if (this.boxedHighPrecisionPixel is null && other.boxedHighPrecisionPixel is null) + { + return this.data.PackedValue == other.data.PackedValue; + } + + return this.ToVector4().Equals(other.ToVector4()); } /// - public override bool Equals(object obj) - { - return obj is Color other && this.Equals(other); - } + public override bool Equals(object obj) => obj is Color other && this.Equals(other); /// [MethodImpl(InliningOptions.ShortMethod)] public override int GetHashCode() { - return this.data.PackedValue.GetHashCode(); + if (this.boxedHighPrecisionPixel is null) + { + return this.data.PackedValue.GetHashCode(); + } + + return this.boxedHighPrecisionPixel.GetHashCode(); } } } diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs index ee1820de7..d3f3cf126 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs @@ -66,7 +66,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Rgb24() { - var source = new Rgb24(1, 22, 231); + var source = new Rgb24(1, 22, 231); // Act: var color = new Color(source); @@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void Bgr24() { - var source = new Bgr24(1, 22, 231); + var source = new Bgr24(1, 22, 231); // Act: var color = new Color(source); @@ -88,6 +88,19 @@ namespace SixLabors.ImageSharp.Tests Bgr24 data = color; Assert.Equal(source, data); } + + [Fact] + public void TPixel() + { + var source = new RgbaVector(1, .1F, .133F, .864F); + + // Act: + var color = Color.FromPixel(source); + + // Assert: + RgbaVector data = color.ToPixel(); + Assert.Equal(source, data); + } } } } From fd07436736d721bedfbafc308d902aa1e7765778 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 12:40:04 +0100 Subject: [PATCH 04/36] Replace Guard with DebugGuard in FastSLog2Slow --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 22c233360..ebebe7954 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -780,7 +780,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static float FastSLog2Slow(uint v) { - Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); + DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); if (v < ApproxLogWithCorrectionMax) { int logCnt = 0; From 2bf16bcb58556d6f3cbee5298472db42af60bd02 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 12:41:43 +0100 Subject: [PATCH 05/36] Reverse access to output array to remove bounds checks --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index ebebe7954..b278b12bc 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1262,11 +1262,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c| Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16()); Sse2.Store((ushort*)p, diff); + int paMinusPb = output[3] + output[2] + output[1] + output[0]; + return (paMinusPb <= 0) ? a : b; } - - int paMinusPb = output[0] + output[1] + output[2] + output[3]; - - return (paMinusPb <= 0) ? a : b; } else #endif From a7ed1884e0f9439c03d913f4d4a5f2b36d38071e Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:15:13 +0100 Subject: [PATCH 06/36] Add sse2 version of ClampedAddSubtractHalf --- .../Formats/Webp/Lossless/LosslessUtils.cs | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b278b12bc..0dda5a79a 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1219,12 +1219,32 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) { - uint ave = Average2(c0, c1); - int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); - int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16()); + Vector128 a0 = Sse2.ShiftRightLogical(avg, 1); + Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); + Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); + Vector128 a2 = Sse2.Subtract(a1, bgta); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2.AsInt16(), 1); + Vector128 a4 = Sse2.Add(a0.AsInt16(), a3).AsInt16(); + Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); + uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); + return output; + } +#endif + { + uint ave = Average2(c0, c1); + int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); + int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } [MethodImpl(InliningOptions.ShortMethod)] From 28053739a9beeed006fd256a0ea8016631660841 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:20:33 +0100 Subject: [PATCH 07/36] Add sse2 version of ClampedAddSubtractFull --- .../Formats/Webp/Lossless/LosslessUtils.cs | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 0dda5a79a..7740dc051 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1201,20 +1201,34 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) { - int a = AddSubtractComponentFull( - (int)(c0 >> 24), - (int)(c1 >> 24), - (int)(c2 >> 24)); - int r = AddSubtractComponentFull( - (int)((c0 >> 16) & 0xff), - (int)((c1 >> 16) & 0xff), - (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentFull( - (int)((c0 >> 8) & 0xff), - (int)((c1 >> 8) & 0xff), - (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 v1 = Sse2.Add(c0Vec, c1Vec); + Vector128 v2 = Sse2.Subtract(v1, c2Vec); + Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); + uint output = Sse2.ConvertToUInt32(b.AsUInt32()); + } +#endif + { + int a = AddSubtractComponentFull( + (int)(c0 >> 24), + (int)(c1 >> 24), + (int)(c2 >> 24)); + int r = AddSubtractComponentFull( + (int)((c0 >> 16) & 0xff), + (int)((c1 >> 16) & 0xff), + (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentFull( + (int)((c0 >> 8) & 0xff), + (int)((c1 >> 8) & 0xff), + (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) From f6dbc7dd8ee95115315805dab2b9b38684e505b2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:40:59 +0100 Subject: [PATCH 08/36] Fix issue in ClampedAddSubtractFull --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 7740dc051..65b39bd2d 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1207,10 +1207,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); - Vector128 v1 = Sse2.Add(c0Vec, c1Vec); - Vector128 v2 = Sse2.Subtract(v1, c2Vec); + Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16()); + Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16()); Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); uint output = Sse2.ConvertToUInt32(b.AsUInt32()); + return output; } #endif { From 8fe280e9918e14ca2abb7ffd21ae35c969429447 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 16:04:29 +0100 Subject: [PATCH 09/36] Add predictor 12 and 13 tests --- .../Formats/WebP/LosslessUtilsTests.cs | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index bf381ebda..c70f332ef 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -153,9 +153,55 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp } } + private static void RunPredictor12Test() + { + // arrange + uint[] topData = { 4294844413, 4294779388 }; + uint left = 4294844413; + uint expectedResult = 4294779388; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor12(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + + private static void RunPredictor13Test() + { + // arrange + uint[] topData = { 4278193922, 4278193666 }; + uint left = 4278193410; + uint expectedResult = 4278193154; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor13(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + [Fact] public void Predictor11_Works() => RunPredictor11Test(); + [Fact] + public void Predictor12_Works() => RunPredictor12Test(); + + [Fact] + public void Predictor13_Works() => RunPredictor13Test(); + [Fact] public void SubtractGreen_Works() => RunSubtractGreenTest(); @@ -175,6 +221,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp [Fact] public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2); + [Fact] + public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2); + + [Fact] + public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2); + [Fact] public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll); From ffdf99bad2d8f4fb9d52a3938f3c64d750f09957 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 16:29:52 +0100 Subject: [PATCH 10/36] Add aggressive inlining --- src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs | 8 ++++++++ src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 1 + 2 files changed, 9 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs index 8596d8555..02bbc38fc 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Webp.Lossless { /// @@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Inserts a new color into the cache. /// /// The color to insert. + [MethodImpl(InliningOptions.ShortMethod)] public void Insert(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -52,6 +55,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The key to lookup. /// The color for the key. + [MethodImpl(InliningOptions.ShortMethod)] public uint Lookup(int key) => this.Colors[key]; /// @@ -59,6 +63,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The color to check. /// The index of the color in the cache or -1 if its not present. + [MethodImpl(InliningOptions.ShortMethod)] public int Contains(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -70,6 +75,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The color. /// The index for the color. + [MethodImpl(InliningOptions.ShortMethod)] public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift); /// @@ -77,8 +83,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The key. /// The color to add. + [MethodImpl(InliningOptions.ShortMethod)] public void Set(uint key, uint bgra) => this.Colors[key] = bgra; + [MethodImpl(InliningOptions.ShortMethod)] public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 65b39bd2d..9baa6c3c3 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -752,6 +752,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// Fast calculation of log2(v) for integer input. /// + [MethodImpl(InliningOptions.ShortMethod)] public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v); /// From fc8d8b81d98201955655595fe682a0c5533eb6ea Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 21:56:19 +0100 Subject: [PATCH 11/36] Remove unnecessary cast AsInt16() --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 9baa6c3c3..8bd3163cc 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1210,7 +1210,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16()); Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16()); - Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); + Vector128 b = Sse2.PackUnsignedSaturate(v2, v2); uint output = Sse2.ConvertToUInt32(b.AsUInt32()); return output; } From f9212f7adca384b1147af10a38e3ec0d8dcc12d2 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Nov 2021 22:38:52 +1100 Subject: [PATCH 12/36] Update tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs Co-authored-by: Anton Firszov --- tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs index d3f3cf126..af35d1f89 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs @@ -92,7 +92,7 @@ namespace SixLabors.ImageSharp.Tests [Fact] public void TPixel() { - var source = new RgbaVector(1, .1F, .133F, .864F); + var source = new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue); // Act: var color = Color.FromPixel(source); From 425600459e96cc5d34857fd9e0de45952fa8e6ae Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Nov 2021 23:49:32 +1100 Subject: [PATCH 13/36] Update Color.Equals --- src/ImageSharp/Color/Color.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index fe66efcfb..61d6c8e6d 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -277,7 +277,7 @@ namespace SixLabors.ImageSharp return this.data.PackedValue == other.data.PackedValue; } - return this.ToVector4().Equals(other.ToVector4()); + return this.boxedHighPrecisionPixel?.Equals(other.boxedHighPrecisionPixel) == true; } /// From 08785103e350266f626b3519b22e3966b4450caa Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 12:39:42 +0100 Subject: [PATCH 14/36] Add EntropyPasses default value explicit to 1 --- src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs | 1 + src/ImageSharp/Formats/Webp/WebpEncoder.cs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs index 7dbf49d45..000de4f88 100644 --- a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs +++ b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs @@ -35,6 +35,7 @@ namespace SixLabors.ImageSharp.Formats.Webp /// /// Gets the number of entropy-analysis passes (in [1..10]). + /// Defaults to 1. /// int EntropyPasses { get; } diff --git a/src/ImageSharp/Formats/Webp/WebpEncoder.cs b/src/ImageSharp/Formats/Webp/WebpEncoder.cs index f85f65b63..bdcbb194b 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoder.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoder.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Formats.Webp public bool UseAlphaCompression { get; set; } /// - public int EntropyPasses { get; set; } + public int EntropyPasses { get; set; } = 1; /// public int SpatialNoiseShaping { get; set; } = 50; From 947dc8d5ecff64414247ede191452cf8c7a77c26 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 12:40:39 +0100 Subject: [PATCH 15/36] Make sure magick.net and imagesharp use the same configuration --- .../Codecs/EncodeWebp.cs | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs index 7d3dfe693..59814f465 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs @@ -4,6 +4,7 @@ using System.IO; using BenchmarkDotNet.Attributes; using ImageMagick; +using ImageMagick.Formats; using SixLabors.ImageSharp.Formats.Webp; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests; @@ -44,8 +45,22 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs public void MagickWebpLossy() { using var memoryStream = new MemoryStream(); - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "lossless", false); - this.webpMagick.Write(memoryStream, MagickFormat.WebP); + + var defines = new WebPWriteDefines + { + Lossless = false, + Method = 4, + AlphaCompression = WebPAlphaCompression.None, + FilterStrength = 60, + SnsStrength = 50, + Pass = 1, + + // 100 means off. + NearLossless = 100 + }; + + this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Write(memoryStream, defines); } [Benchmark(Description = "ImageSharp Webp Lossy")] @@ -54,7 +69,12 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs using var memoryStream = new MemoryStream(); this.webp.Save(memoryStream, new WebpEncoder() { - FileFormat = WebpFileFormatType.Lossy + FileFormat = WebpFileFormatType.Lossy, + Method = WebpEncodingMethod.Level4, + UseAlphaCompression = false, + FilterStrength = 60, + SpatialNoiseShaping = 50, + EntropyPasses = 1 }); } @@ -62,8 +82,18 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs public void MagickWebpLossless() { using var memoryStream = new MemoryStream(); - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "lossless", true); - this.webpMagick.Write(memoryStream, MagickFormat.WebP); + var defines = new WebPWriteDefines + { + Lossless = true, + Method = 4, + + // 100 means off. + NearLossless = 100 + }; + + this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "exact", false); + this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Write(memoryStream, defines); } [Benchmark(Description = "ImageSharp Webp Lossless")] @@ -72,7 +102,10 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs using var memoryStream = new MemoryStream(); this.webp.Save(memoryStream, new WebpEncoder() { - FileFormat = WebpFileFormatType.Lossless + FileFormat = WebpFileFormatType.Lossless, + Method = WebpEncodingMethod.Level4, + NearLossless = false, + TransparentColorMode = WebpTransparentColorMode.Clear }); } From 55b67ada2f659463f438303e77d0f1b1de4c47bc Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 21:40:02 +0100 Subject: [PATCH 16/36] Use webpMagick.Quality for the quality parameter --- tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs index 59814f465..222984992 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs @@ -59,7 +59,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs NearLossless = 100 }; - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Quality = 75; this.webpMagick.Write(memoryStream, defines); } @@ -91,8 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs NearLossless = 100 }; - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "exact", false); - this.webpMagick.Settings.SetDefine(MagickFormat.WebP, "quality", 75); + this.webpMagick.Quality = 75; this.webpMagick.Write(memoryStream, defines); } @@ -105,6 +104,8 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs FileFormat = WebpFileFormatType.Lossless, Method = WebpEncodingMethod.Level4, NearLossless = false, + + // This is equal to exact = false in libwebp, which is the default. TransparentColorMode = WebpTransparentColorMode.Clear }); } From d6d952e477b0653b2750210ad4cd2d3fc14bbaec Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 23:12:01 +0100 Subject: [PATCH 17/36] Remove another unnecessary cast AsInt16() --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 8bd3163cc..ee9ea5123 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1246,8 +1246,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); Vector128 a2 = Sse2.Subtract(a1, bgta); - Vector128 a3 = Sse2.ShiftRightArithmetic(a2.AsInt16(), 1); - Vector128 a4 = Sse2.Add(a0.AsInt16(), a3).AsInt16(); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2, 1); + Vector128 a4 = Sse2.Add(a0, a3).AsInt16(); Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); return output; From 2b6dbbce6fb6561a7fbddb0bd08afe69b9349382 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 5 Nov 2021 12:46:53 +0100 Subject: [PATCH 18/36] Update benchmark results --- .../Codecs/DecodeWebp.cs | 49 ++++++++--------- .../Codecs/EncodeWebp.cs | 55 +++++++++---------- 2 files changed, 48 insertions(+), 56 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs index 407a4ef3b..878929823 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs @@ -76,34 +76,29 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs return image.Height; } - /* Results 17.06.2021 - * BenchmarkDotNet=v0.12.0, OS=Windows 10.0.18362 + /* Results 04.11.2021 + * BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19043.1320 (21H1/May2021Update) Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores - .NET Core SDK=3.1.202 - [Host] : .NET Core 3.1.4 (CoreCLR 4.700.20.20201, CoreFX 4.700.20.22101), X64 RyuJIT - Job-AQFZAV : .NET Framework 4.8 (4.8.4180.0), X64 RyuJIT - Job-YCDAPQ : .NET Core 2.1.18 (CoreCLR 4.6.28801.04, CoreFX 4.6.28802.05), X64 RyuJIT - Job-WMTYOZ : .NET Core 3.1.4 (CoreCLR 4.700.20.20201, CoreFX 4.700.20.22101), X64 RyuJIT - - IterationCount=3 LaunchCount=1 WarmupCount=3 - | Method | Job | Runtime | TestImageLossy | TestImageLossless | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated | - |--------------------------- |----------- |-------------- |---------------------- |------------------------- |-----------:|----------:|---------:|----------:|----------:|------:|------------:| - | 'Magick Lossy Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.8 ms | 6.28 ms | 0.34 ms | - | - | - | 17.65 KB | - | 'ImageSharp Lossy Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,145.0 ms | 110.82 ms | 6.07 ms | - | - | - | 2779.53 KB | - | 'Magick Lossless Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 145.9 ms | 8.55 ms | 0.47 ms | - | - | - | 18.05 KB | - | 'ImageSharp Lossless Webp' | Job-IERNAB | .NET 4.7.2 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,694.1 ms | 55.09 ms | 3.02 ms | 4000.0000 | 1000.0000 | - | 30556.87 KB | - | 'Magick Lossy Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.7 ms | 1.89 ms | 0.10 ms | - | - | - | 15.75 KB | - | 'ImageSharp Lossy Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 741.6 ms | 21.45 ms | 1.18 ms | - | - | - | 2767.85 KB | - | 'Magick Lossless Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.1 ms | 9.52 ms | 0.52 ms | - | - | - | 16.54 KB | - | 'ImageSharp Lossless Webp' | Job-IMRAGJ | .NET Core 2.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 522.5 ms | 21.15 ms | 1.16 ms | 4000.0000 | 1000.0000 | - | 22860.02 KB | - | 'Magick Lossy Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.9 ms | 5.34 ms | 0.29 ms | - | - | - | 15.45 KB | - | 'ImageSharp Lossy Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 748.8 ms | 290.47 ms | 15.92 ms | - | - | - | 2767.84 KB | - | 'Magick Lossless Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.1 ms | 1.14 ms | 0.06 ms | - | - | - | 15.9 KB | - | 'ImageSharp Lossless Webp' | Job-NAASQX | .NET Core 3.1 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 480.7 ms | 25.25 ms | 1.38 ms | 4000.0000 | 1000.0000 | - | 22859.7 KB | - | 'Magick Lossy Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 105.7 ms | 4.71 ms | 0.26 ms | - | - | - | 15.48 KB | - | 'ImageSharp Lossy Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 645.7 ms | 61.00 ms | 3.34 ms | - | - | - | 2768.13 KB | - | 'Magick Lossless Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 146.5 ms | 18.63 ms | 1.02 ms | - | - | - | 15.8 KB | - | 'ImageSharp Lossless Webp' | Job-GLNACU | .NET Core 5.0 | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 306.7 ms | 32.31 ms | 1.77 ms | 4000.0000 | 1000.0000 | - | 22860.02 KB | + .NET SDK=6.0.100-rc.2.21505.57 + [Host] : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-WQLXJO : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-OJJAMD : .NET Core 3.1.20 (CoreCLR 4.700.21.47003, CoreFX 4.700.21.47101), X64 RyuJIT + Job-OMFOAS : .NET Framework 4.8 (4.8.4420.0), X64 RyuJIT + + | Method | Job | Runtime | Arguments | TestImageLossy | TestImageLossless | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated | + |--------------------------- |----------- |--------------------- |---------------------- |---------------------- |------------------------- |-----------:|----------:|--------:|---------:|------:|------:|----------:| + | 'Magick Lossy Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 107.9 ms | 28.91 ms | 1.58 ms | - | - | - | 25 KB | + | 'ImageSharp Lossy Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 282.3 ms | 25.40 ms | 1.39 ms | 500.0000 | - | - | 2,428 KB | + | 'Magick Lossless Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.3 ms | 11.99 ms | 0.66 ms | - | - | - | 16 KB | + | 'ImageSharp Lossless Webp' | Job-HLWZLL | .NET 5.0 | /p:DebugType=portable | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 280.2 ms | 6.21 ms | 0.34 ms | - | - | - | 2,092 KB | + | 'Magick Lossy Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.2 ms | 9.32 ms | 0.51 ms | - | - | - | 15 KB | + | 'ImageSharp Lossy Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 295.8 ms | 21.25 ms | 1.16 ms | 500.0000 | - | - | 2,427 KB | + | 'Magick Lossless Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.5 ms | 4.07 ms | 0.22 ms | - | - | - | 15 KB | + | 'ImageSharp Lossless Webp' | Job-ALQPDS | .NET Core 3.1 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 464.0 ms | 55.70 ms | 3.05 ms | - | - | - | 2,090 KB | + | 'Magick Lossy Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 108.0 ms | 29.60 ms | 1.62 ms | - | - | - | 32 KB | + | 'ImageSharp Lossy Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 564.9 ms | 29.69 ms | 1.63 ms | - | - | - | 2,436 KB | + | 'Magick Lossless Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 106.2 ms | 4.74 ms | 0.26 ms | - | - | - | 18 KB | + | 'ImageSharp Lossless Webp' | Job-RYVVNN | .NET Framework 4.7.2 | Default | Webp/earth_lossy.webp | Webp/earth_lossless.webp | 1,767.5 ms | 106.33 ms | 5.83 ms | - | - | - | 9,729 KB | */ } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs index 222984992..43d8c464c 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs @@ -110,37 +110,34 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs }); } - /* Results 17.06.2021 + /* Results 04.11.2021 * Summary * - BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.630 (2004/?/20H1) + BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19043.1320 (21H1/May2021Update) Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores - .NET Core SDK=5.0.100 - [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - Job-OUUGWL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT - Job-GAIITM : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT - Job-HWOBSO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - - | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - |--------------------------- |----------- |-------------- |------------- |----------:|-----------:|----------:|------:|--------:|-----------:|----------:|----------:|-------------:| - | 'Magick Webp Lossy' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 23.30 ms | 0.869 ms | 0.048 ms | 0.14 | 0.00 | - | - | - | 68.19 KB | - | 'ImageSharp Webp Lossy' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 68.22 ms | 16.454 ms | 0.902 ms | 0.42 | 0.01 | 6125.0000 | 125.0000 | - | 26359.49 KB | - | 'Magick Webp Lossless' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 161.96 ms | 9.879 ms | 0.541 ms | 1.00 | 0.00 | - | - | - | 520.28 KB | - | 'ImageSharp Webp Lossless' | Job-RYVNHD | .NET 4.7.2 | Png/Bike.png | 370.88 ms | 58.875 ms | 3.227 ms | 2.29 | 0.02 | 34000.0000 | 5000.0000 | 2000.0000 | 163177.15 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 23.35 ms | 0.428 ms | 0.023 ms | 0.14 | 0.00 | - | - | - | 67.76 KB | - | 'ImageSharp Webp Lossy' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 43.95 ms | 2.850 ms | 0.156 ms | 0.27 | 0.00 | 6250.0000 | 250.0000 | 83.3333 | 26284.72 KB | - | 'Magick Webp Lossless' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 161.44 ms | 3.749 ms | 0.206 ms | 1.00 | 0.00 | - | - | - | 519.26 KB | - | 'ImageSharp Webp Lossless' | Job-GOZXWU | .NET Core 2.1 | Png/Bike.png | 335.78 ms | 78.666 ms | 4.312 ms | 2.08 | 0.03 | 34000.0000 | 5000.0000 | 2000.0000 | 162727.56 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 23.48 ms | 4.325 ms | 0.237 ms | 0.15 | 0.00 | - | - | - | 67.66 KB | - | 'ImageSharp Webp Lossy' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 43.29 ms | 16.503 ms | 0.905 ms | 0.27 | 0.01 | 6272.7273 | 272.7273 | 90.9091 | 26284.86 KB | - | 'Magick Webp Lossless' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 161.81 ms | 10.693 ms | 0.586 ms | 1.00 | 0.00 | - | - | - | 523.25 KB | - | 'ImageSharp Webp Lossless' | Job-VRDVKW | .NET Core 3.1 | Png/Bike.png | 323.97 ms | 235.468 ms | 12.907 ms | 2.00 | 0.08 | 34000.0000 | 5000.0000 | 2000.0000 | 162724.84 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 23.36 ms | 0.448 ms | 0.025 ms | 0.14 | 0.00 | - | - | - | 67.66 KB | - | 'ImageSharp Webp Lossy' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 40.11 ms | 2.465 ms | 0.135 ms | 0.25 | 0.00 | 6307.6923 | 230.7692 | 76.9231 | 26284.71 KB | - | 'Magick Webp Lossless' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 161.55 ms | 6.662 ms | 0.365 ms | 1.00 | 0.00 | - | - | - | 518.84 KB | - | 'ImageSharp Webp Lossless' | Job-ZJRLRB | .NET Core 5.0 | Png/Bike.png | 298.73 ms | 17.953 ms | 0.984 ms | 1.85 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 162725.13 KB | + .NET SDK=6.0.100-rc.2.21505.57 + [Host] : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-WQLXJO : .NET 5.0.11 (5.0.1121.47308), X64 RyuJIT + Job-OJJAMD : .NET Core 3.1.20 (CoreCLR 4.700.21.47003, CoreFX 4.700.21.47101), X64 RyuJIT + Job-OMFOAS : .NET Framework 4.8 (4.8.4420.0), X64 RyuJIT + + IterationCount=3 LaunchCount=1 WarmupCount=3 + + | Method | Job | Runtime | Arguments | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + |--------------------------- |----------- |--------------------- |---------------------- |------------- |----------:|----------:|---------:|------:|--------:|------------:|----------:|----------:|-----------:| + | 'Magick Webp Lossy' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 23.33 ms | 1.491 ms | 0.082 ms | 0.15 | 0.00 | - | - | - | 67 KB | + | 'ImageSharp Webp Lossy' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 245.80 ms | 24.288 ms | 1.331 ms | 1.53 | 0.01 | 135000.0000 | - | - | 552,713 KB | + | 'Magick Webp Lossless' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 160.36 ms | 11.131 ms | 0.610 ms | 1.00 | 0.00 | - | - | - | 518 KB | + | 'ImageSharp Webp Lossless' | Job-WQLXJO | .NET 5.0 | /p:DebugType=portable | Png/Bike.png | 313.93 ms | 45.605 ms | 2.500 ms | 1.96 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 161,670 KB | + | | | | | | | | | | | | | | | + | 'Magick Webp Lossy' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 23.36 ms | 2.289 ms | 0.125 ms | 0.15 | 0.00 | - | - | - | 67 KB | + | 'ImageSharp Webp Lossy' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 254.64 ms | 19.620 ms | 1.075 ms | 1.59 | 0.00 | 135000.0000 | - | - | 552,713 KB | + | 'Magick Webp Lossless' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 160.30 ms | 9.549 ms | 0.523 ms | 1.00 | 0.00 | - | - | - | 518 KB | + | 'ImageSharp Webp Lossless' | Job-OJJAMD | .NET Core 3.1 | Default | Png/Bike.png | 320.35 ms | 22.924 ms | 1.257 ms | 2.00 | 0.01 | 34000.0000 | 5000.0000 | 2000.0000 | 161,669 KB | + | | | | | | | | | | | | | | | + | 'Magick Webp Lossy' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 23.37 ms | 0.908 ms | 0.050 ms | 0.15 | 0.00 | - | - | - | 68 KB | + | 'ImageSharp Webp Lossy' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 378.67 ms | 25.540 ms | 1.400 ms | 2.36 | 0.01 | 135000.0000 | - | - | 554,351 KB | + | 'Magick Webp Lossless' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 160.13 ms | 5.115 ms | 0.280 ms | 1.00 | 0.00 | - | - | - | 520 KB | + | 'ImageSharp Webp Lossless' | Job-OMFOAS | .NET Framework 4.7.2 | Default | Png/Bike.png | 379.01 ms | 71.192 ms | 3.902 ms | 2.37 | 0.02 | 34000.0000 | 5000.0000 | 2000.0000 | 162,119 KB | */ } } From b9e8f76990206843b485006bac8b9ff2cceb05ed Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 7 Nov 2021 18:07:43 +1100 Subject: [PATCH 19/36] Update FromPixel --- src/ImageSharp/Color/Color.Conversions.cs | 11 +++++++++++ src/ImageSharp/Color/Color.cs | 22 +++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 424b7dcdf..96aa05c96 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -23,6 +23,17 @@ namespace SixLabors.ImageSharp this.boxedHighPrecisionPixel = null; } + /// + /// Initializes a new instance of the struct. + /// + /// The containing the color information. + [MethodImpl(InliningOptions.ShortMethod)] + public Color(Rgb48 pixel) + { + this.data = new Rgba64(pixel.R, pixel.G, pixel.B, ushort.MaxValue); + this.boxedHighPrecisionPixel = null; + } + /// /// Initializes a new instance of the struct. /// diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index 61d6c8e6d..c461d034e 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -107,7 +107,27 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static Color FromPixel(TPixel pixel) where TPixel : unmanaged, IPixel - => new(pixel); + { + // Avoid boxing in case we can convert to Rgba64 safely and efficently + if (typeof(TPixel) == typeof(Rgba64)) + { + return new((Rgba64)(object)pixel); + } + else if (typeof(TPixel) == typeof(Rgb48)) + { + return new((Rgb48)(object)pixel); + } + else if (Unsafe.SizeOf() <= Unsafe.SizeOf()) + { + Rgba32 p = default; + pixel.ToRgba32(ref p); + return new(p); + } + else + { + return new(pixel); + } + } /// /// Creates a new instance of the struct From 5b1720eb8deccd3ea37248111a68df73ce632c3a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 13:27:08 +0100 Subject: [PATCH 20/36] Add sse41 version of Hadamard transform --- .../Formats/Webp/Lossy/LossyUtils.cs | 151 +++++++++++++++++- 1 file changed, 146 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 04ff80b2d..0993e2a66 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,11 +4,15 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static class LossyUtils + internal static unsafe class LossyUtils { [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -61,11 +65,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static int Vp8Disto16X16(Span a, Span b, Span w) { int d = 0; + int dataSize = (4 * WebpConstants.Bps) - 16; for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps) { for (int x = 0; x < 16; x += 4) { - d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w); + d += Vp8Disto4X4(a.Slice(x + y, dataSize), b.Slice(x + y, dataSize), w); } } @@ -75,9 +80,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Disto4X4(Span a, Span b, Span w) { - int sum1 = TTransform(a, w); - int sum2 = TTransform(b, w); - return Math.Abs(sum2 - sum1) >> 5; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported) + { + int diffSum = TTransformSse41(a, b, w); + return Math.Abs(diffSum) >> 5; + } + else +#endif + { + int sum1 = TTransform(a, w); + int sum2 = TTransform(b, w); + return Math.Abs(sum2 - sum1) >> 5; + } } public static void DC16(Span dst, Span yuv, int offset) @@ -591,6 +606,132 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return sum; } +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// Hadamard transform + /// Returns the weighted sum of the absolute value of transformed coefficients. + /// w[] contains a row-major 4 by 4 symmetric matrix. + /// + public static int TTransformSse41(Span inputA, Span inputB, Span w) + { + Span sum = stackalloc int[4]; +#pragma warning disable SA1503 // Braces should not be omitted + fixed (byte* inputAPtr = inputA) + fixed (byte* inputBPtr = inputB) + fixed (ushort* wPtr = w) + fixed (int* outputPtr = sum) + { + // Load and combine inputs. + Vector128 ina0 = Sse2.LoadVector128(inputAPtr); + Vector128 ina1 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 1)); + Vector128 ina2 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 2)); + Vector128 ina3 = Sse2.LoadVector128((long*)(inputAPtr + (WebpConstants.Bps * 3))); + Vector128 inb0 = Sse2.LoadVector128(inputBPtr); + Vector128 inb1 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 1)); + Vector128 inb2 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 2)); + Vector128 inb3 = Sse2.LoadVector128((long*)(inputBPtr + (WebpConstants.Bps * 3))); + + // Combine inA and inB (we'll do two transforms in parallel). + Vector128 inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32()); + Vector128 inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32()); + Vector128 inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32()); + Vector128 inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32()); + Vector128 tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte()); + Vector128 tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte()); + Vector128 tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte()); + Vector128 tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte()); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Vertical pass first to avoid a transpose (vertical and horizontal passes + // are commutative because w/kWeightY is symmetric) and subsequent transpose. + // Calculate a and b (two 4x4 at once). + Vector128 a0 = Sse2.Add(tmp0, tmp2); + Vector128 a1 = Sse2.Add(tmp1, tmp3); + Vector128 a2 = Sse2.Subtract(tmp1, tmp3); + Vector128 a3 = Sse2.Subtract(tmp0, tmp2); + Vector128 b0 = Sse2.Add(a0, a1); + Vector128 b1 = Sse2.Add(a3, a2); + Vector128 b2 = Sse2.Subtract(a3, a2); + Vector128 b3 = Sse2.Subtract(a0, a1); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Transpose the two 4x4. + Vector128 transpose00 = Sse2.UnpackLow(b0, b1); + Vector128 transpose01 = Sse2.UnpackLow(b2, b3); + Vector128 transpose02 = Sse2.UnpackHigh(b0, b1); + Vector128 transpose03 = Sse2.UnpackHigh(b2, b3); + + // a00 a10 a01 a11 a02 a12 a03 a13 + // a20 a30 a21 a31 a22 a32 a23 a33 + // b00 b10 b01 b11 b02 b12 b03 b13 + // b20 b30 b21 b31 b22 b32 b23 b33 + Vector128 transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32()); + Vector128 transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32()); + + // a00 a10 a20 a30 a01 a11 a21 a31 + // b00 b10 b20 b30 b01 b11 b21 b31 + // a02 a12 a22 a32 a03 a13 a23 a33 + // b02 b12 a22 b32 b03 b13 b23 b33 + Vector128 output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64()); + Vector128 output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64()); + + // a00 a10 a20 a30 b00 b10 b20 b30 + // a01 a11 a21 a31 b01 b11 b21 b31 + // a02 a12 a22 a32 b02 b12 b22 b32 + // a03 a13 a23 a33 b03 b13 b23 b33 + // Horizontal pass and difference of weighted sums. + Vector128 w0 = Sse2.LoadVector128(wPtr); + Vector128 w8 = Sse2.LoadVector128(wPtr + 8); + + // Calculate a and b (two 4x4 at once). + a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16()); + a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16()); + a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16()); + a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16()); + b0 = Sse2.Add(a0, a1); + b1 = Sse2.Add(a3, a2); + b2 = Sse2.Subtract(a3, a2); + b3 = Sse2.Subtract(a0, a1); + + // Separate the transforms of inA and inB. + Vector128 ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64()); + Vector128 ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64()); + Vector128 bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64()); + Vector128 bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64()); + + Vector128 ab0Abs = Ssse3.Abs(ab0.AsInt16()); + Vector128 ab2Abs = Ssse3.Abs(ab2.AsInt16()); + Vector128 b0Abs = Ssse3.Abs(bb0.AsInt16()); + Vector128 bb2Abs = Ssse3.Abs(bb2.AsInt16()); + + // weighted sums. + Vector128 ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16()); + Vector128 ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16()); + Vector128 b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16()); + Vector128 bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16()); + Vector128 ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8); + Vector128 b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8); + + // difference of weighted sums. + Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); + Sse2.Store(outputPtr, result.AsInt32()); + } + + return sum[3] + sum[2] + sum[1] + sum[0]; +#pragma warning restore SA1503 // Braces should not be omitted + } +#endif + public static void TransformTwo(Span src, Span dst) { TransformOne(src, dst); From d2017933d7042d3757062cfe3134206652ce7b27 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 13:31:11 +0100 Subject: [PATCH 21/36] Add HadamardTransform sse tests --- .../Formats/WebP/LossyUtilsTests.cs | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs new file mode 100644 index 000000000..6a9a078d7 --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -0,0 +1,58 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using SixLabors.ImageSharp.Formats.Webp.Lossy; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Formats.WebP +{ + [Trait("Format", "Webp")] + public class LossyUtilsTests + { + private static void RunHadamardTransformTest() + { + byte[] a = + { + 27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, + 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29, 28, + 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 26, + 26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, + 128, 128, 128, 128, 128, 128, 128, 28, 27, 27, 26, 26, 27, 27, 28, 27, 28, 28, 29, 29, 28, 28, 27 + }; + + byte[] b = + { + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 204, 204, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28 + }; + + ushort[] w = { 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2 }; + int expected = 2; + + int actual = LossyUtils.Vp8Disto4X4(a, b, w); + Assert.Equal(expected, actual); + } + + [Fact] + public void HadamardTransform_Works() => RunHadamardTransformTest(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll); + + [Fact] + public void HadamardTransform_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE2); + + [Fact] + public void HadamardTransform_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41); + + [Fact] + public void HadamardTransform_WithoutSSE2AndSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41 | HwIntrinsics.DisableSSE2); +#endif + + } +} From 3a03fad75eaa8464d1bd84cccd307014f9417497 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 14:51:51 +0100 Subject: [PATCH 22/36] Add sse41 version of quantize block --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 176 ++++++++++++++---- 1 file changed, 144 insertions(+), 32 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 2ed438166..02087ceda 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -3,13 +3,17 @@ using System; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { /// /// Quantization methods. /// - internal static class QuantEnc + internal static unsafe class QuantEnc { private static readonly byte[] Zigzag = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; @@ -17,6 +21,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private const int MaxLevel = 2047; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 MaxCoeff2047 = Vector128.Create((short)MaxLevel); + + private static readonly Vector128 CstLo = Vector128.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13); + + private static readonly Vector128 Cst7 = Vector128.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255); + + private static readonly Vector128 CstHi = Vector128.Create(2, 3, 8, 9, 10, 11, 4, 5, 254, 255, 6, 7, 12, 13, 14, 15); + + private static readonly Vector128 Cst8 = Vector128.Create(254, 255, 254, 255, 254, 255, 0, 1, 254, 255, 254, 255, 254, 255, 254, 255); +#endif + // Diffusion weights. We under-correct a bit (15/16th of the error is actually // diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. private const int C1 = 7; // fraction of error sent to the 4x4 block below @@ -486,51 +502,147 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy [MethodImpl(InliningOptions.ShortMethod)] public static int Quantize2Blocks(Span input, Span output, Vp8Matrix mtx) { - int nz = QuantizeBlock(input, output, mtx) << 0; - nz |= QuantizeBlock(input.Slice(1 * 16), output.Slice(1 * 16), mtx) << 1; + int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), mtx) << 0; + nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), mtx) << 1; return nz; } public static int QuantizeBlock(Span input, Span output, Vp8Matrix mtx) { - int last = -1; - int n; - for (n = 0; n < 16; ++n) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported) { - int j = Zigzag[n]; - bool sign = input[j] < 0; - uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); - if (coeff > mtx.ZThresh[j]) +#pragma warning disable SA1503 // Braces should not be omitted + fixed (ushort* mtxIqPtr = mtx.IQ) + fixed (ushort* mtxQPtr = mtx.Q) + fixed (uint* biasQPtr = mtx.Bias) + fixed (short* inputPtr = input) + fixed (short* outputPtr = output) { - uint q = mtx.Q[j]; - uint iQ = mtx.IQ[j]; - uint b = mtx.Bias[j]; - int level = QuantDiv(coeff, iQ, b); - if (level > MaxLevel) + // Load all inputs. + Vector128 input0 = Sse2.LoadVector128(inputPtr); + Vector128 input8 = Sse2.LoadVector128(inputPtr + 8); + Vector128 iq0 = Sse2.LoadVector128(mtxIqPtr); + Vector128 iq8 = Sse2.LoadVector128(mtxIqPtr + 8); + Vector128 q0 = Sse2.LoadVector128(mtxQPtr); + Vector128 q8 = Sse2.LoadVector128(mtxQPtr + 8); + + // coeff = abs(in) + Vector128 coeff0 = Ssse3.Abs(input0); + Vector128 coeff8 = Ssse3.Abs(input8); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); + Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); + Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); + Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); + Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); + Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); + Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); + + // out = (coeff * iQ + B) + Vector128 bias00 = Sse2.LoadVector128(biasQPtr); + Vector128 bias04 = Sse2.LoadVector128(biasQPtr + 4); + Vector128 bias08 = Sse2.LoadVector128(biasQPtr + 8); + Vector128 bias12 = Sse2.LoadVector128(biasQPtr + 12); + out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); + out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // pack result as 16b + Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); + Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Sse2.Min(out0, MaxCoeff2047); + out8 = Sse2.Min(out8, MaxCoeff2047); + + // put sign back + out0 = Ssse3.Sign(out0, input0); + out8 = Ssse3.Sign(out8, input8); + + // in = out * Q + input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); + input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); + + // in = out * Q + Sse2.Store(inputPtr, input0); + Sse2.Store(inputPtr + 8, input8); + + // zigzag the output before storing it. The re-ordering is: + // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 + // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 + // There's only two misplaced entries ([8] and [7]) that are crossing the + // reg's boundaries. + // We use pshufb instead of pshuflo/pshufhi. + Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); + Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 + Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); + Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 + Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); + Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + Sse2.Store(outputPtr, outZ0.AsInt16()); + Sse2.Store(outputPtr + 8, outZ8.AsInt16()); + Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); + + // Detect if all 'out' values are zeroes or not. + Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); + return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; + } +#pragma warning restore SA1503 // Braces should not be omitted + } + else +#endif + { + int last = -1; + int n; + for (n = 0; n < 16; ++n) + { + int j = Zigzag[n]; + bool sign = input[j] < 0; + uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); + if (coeff > mtx.ZThresh[j]) { - level = MaxLevel; - } + uint q = mtx.Q[j]; + uint iQ = mtx.IQ[j]; + uint b = mtx.Bias[j]; + int level = QuantDiv(coeff, iQ, b); + if (level > MaxLevel) + { + level = MaxLevel; + } - if (sign) - { - level = -level; - } + if (sign) + { + level = -level; + } - input[j] = (short)(level * (int)q); - output[n] = (short)level; - if (level != 0) + input[j] = (short)(level * (int)q); + output[n] = (short)level; + if (level != 0) + { + last = n; + } + } + else { - last = n; + output[n] = 0; + input[j] = 0; } } - else - { - output[n] = 0; - input[j] = 0; - } - } - return last >= 0 ? 1 : 0; + return last >= 0 ? 1 : 0; + } } // Quantize as usual, but also compute and return the quantization error. From 020134ad8c15e58621635d4ca4b5fb4c6acdbe89 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 14:52:11 +0100 Subject: [PATCH 23/36] Add QuantizeBlock sse tests --- .../Formats/Webp/Lossy/Vp8Matrix.cs | 9 +++ .../Formats/WebP/QuantEncTests.cs | 56 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs index 4276b887f..e525e388b 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs @@ -34,6 +34,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.Sharpen = new short[16]; } + public Vp8Matrix(ushort[] q, ushort[] iq, uint[] bias, uint[] zThresh, short[] sharpen) + { + this.Q = q; + this.IQ = iq; + this.Bias = bias; + this.ZThresh = zThresh; + this.Sharpen = sharpen; + } + /// /// Gets the quantizer steps. /// diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs new file mode 100644 index 000000000..280a7902a --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -0,0 +1,56 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Linq; +using SixLabors.ImageSharp.Formats.Webp.Lossy; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Formats.WebP +{ + [Trait("Format", "Webp")] + public class QuantEncTests + { + private static void RunQuantizeBlockTest() + { + // arrange + short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 }; + short[] output = new short[16]; + ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 }; + ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 }; + uint[] bias = + { + 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, + 55296, 55296 + }; + uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 }; + short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 }; + int expectedResult = 1; + var vp8Matrix = new Vp8Matrix(q, iq, bias, zthresh, new short[16]); + + // act + int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); + + // assert + Assert.True(output.SequenceEqual(expectedOutput)); + Assert.Equal(expectedResult, actualResult); + } + + [Fact] + public void QuantizeBlock_Works() => RunQuantizeBlockTest(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); + + [Fact] + public void QuantizeBlock_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2); + + [Fact] + public void QuantizeBlock_WithoutSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSSE3); + + [Fact] + public void QuantizeBlock_WithoutSSE2AndSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); +#endif + } +} From a628909b8da58e9dbd10bfa3b70e9c8ce66ddc1d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 15:02:08 +0100 Subject: [PATCH 24/36] Add coeff = abs(in) + sharpen --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 02087ceda..b812909b2 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -516,6 +516,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy fixed (ushort* mtxIqPtr = mtx.IQ) fixed (ushort* mtxQPtr = mtx.Q) fixed (uint* biasQPtr = mtx.Bias) + fixed (short* sharpenPtr = mtx.Sharpen) fixed (short* inputPtr = input) fixed (short* outputPtr = output) { @@ -531,6 +532,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 coeff0 = Ssse3.Abs(input0); Vector128 coeff8 = Ssse3.Abs(input8); + // coeff = abs(in) + sharpen + Vector128 sharpen0 = Sse2.LoadVector128(sharpenPtr); + Vector128 sharpen8 = Sse2.LoadVector128(sharpenPtr + 8); + Sse2.Add(coeff0.AsInt16(), sharpen0); + Sse2.Add(coeff8.AsInt16(), sharpen8); + // out = (coeff * iQ + B) >> QFIX // doing calculations with 32b precision (QFIX=17) // out = (coeff * iQ) From af90336173a1ee20a6c894c113e5f799b139bf9f Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sun, 7 Nov 2021 15:25:47 +0100 Subject: [PATCH 25/36] stackalloc header buffer in InternalDetectFormat --- src/ImageSharp/Image.Decode.cs | 51 +++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/src/ImageSharp/Image.Decode.cs b/src/ImageSharp/Image.Decode.cs index 94da2c995..ee340bf86 100644 --- a/src/ImageSharp/Image.Decode.cs +++ b/src/ImageSharp/Image.Decode.cs @@ -58,31 +58,42 @@ namespace SixLabors.ImageSharp return null; } - using (IMemoryOwner buffer = config.MemoryAllocator.Allocate(headerSize, AllocationOptions.Clean)) + // Header sizes are so small, that headersBuffer will be always stackalloc-ed in practice, + // and heap allocation will never happen, there is no need for the usual try-finally ArrayPool dance. + // The array case is only a safety mechanism following stackalloc best practices. + Span headersBuffer = headerSize > 512 ? new byte[headerSize] : stackalloc byte[headerSize]; + long startPosition = stream.Position; + + // Read doesn't always guarantee the full returned length so read a byte + // at a time until we get either our count or hit the end of the stream. + int n = 0; + int i; + do { - Span bufferSpan = buffer.GetSpan(); - long startPosition = stream.Position; + i = stream.Read(headersBuffer, n, headerSize - n); + n += i; + } + while (n < headerSize && i > 0); - // Read doesn't always guarantee the full returned length so read a byte - // at a time until we get either our count or hit the end of the stream. - int n = 0; - int i; - do + stream.Position = startPosition; + + // Does the given stream contain enough data to fit in the header for the format + // and does that data match the format specification? + // Individual formats should still check since they are public. + IImageFormat format = null; + foreach (IImageFormatDetector formatDetector in config.ImageFormatsManager.FormatDetectors) + { + if (formatDetector.HeaderSize <= headerSize) { - i = stream.Read(bufferSpan, n, headerSize - n); - n += i; + IImageFormat attemptFormat = formatDetector.DetectFormat(headersBuffer); + if (attemptFormat != null) + { + format = attemptFormat; + } } - while (n < headerSize && i > 0); - - stream.Position = startPosition; - - // Does the given stream contain enough data to fit in the header for the format - // and does that data match the format specification? - // Individual formats should still check since they are public. - return config.ImageFormatsManager.FormatDetectors - .Where(x => x.HeaderSize <= headerSize) - .Select(x => x.DetectFormat(buffer.GetSpan())).LastOrDefault(x => x != null); } + + return format; } /// From 90bab3939770a028a45e3d824dc6949fa124c492 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 8 Nov 2021 16:56:38 +1100 Subject: [PATCH 26/36] Special case La32 and L16 --- src/ImageSharp/Color/Color.Conversions.cs | 22 ++++++++++++++++++++++ src/ImageSharp/Color/Color.cs | 8 ++++++++ 2 files changed, 30 insertions(+) diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs index 96aa05c96..bf7869e53 100644 --- a/src/ImageSharp/Color/Color.Conversions.cs +++ b/src/ImageSharp/Color/Color.Conversions.cs @@ -34,6 +34,28 @@ namespace SixLabors.ImageSharp this.boxedHighPrecisionPixel = null; } + /// + /// Initializes a new instance of the struct. + /// + /// The containing the color information. + [MethodImpl(InliningOptions.ShortMethod)] + public Color(La32 pixel) + { + this.data = new Rgba64(pixel.L, pixel.L, pixel.L, pixel.A); + this.boxedHighPrecisionPixel = null; + } + + /// + /// Initializes a new instance of the struct. + /// + /// The containing the color information. + [MethodImpl(InliningOptions.ShortMethod)] + public Color(L16 pixel) + { + this.data = new Rgba64(pixel.PackedValue, pixel.PackedValue, pixel.PackedValue, ushort.MaxValue); + this.boxedHighPrecisionPixel = null; + } + /// /// Initializes a new instance of the struct. /// diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index c461d034e..7c21d62dd 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -117,6 +117,14 @@ namespace SixLabors.ImageSharp { return new((Rgb48)(object)pixel); } + else if (typeof(TPixel) == typeof(La32)) + { + return new((La32)(object)pixel); + } + else if (typeof(TPixel) == typeof(L16)) + { + return new((L16)(object)pixel); + } else if (Unsafe.SizeOf() <= Unsafe.SizeOf()) { Rgba32 p = default; From 5c6e08b80c39f3cd4e24774ee66b5b011c41aa00 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 16:02:06 +0100 Subject: [PATCH 27/36] Avoid pinning of vp8 matrix data --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 169 +++++++++--------- 1 file changed, 85 insertions(+), 84 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index f935bd3ee..b300b7b5c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -3,6 +3,7 @@ using System; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -537,99 +538,99 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy if (Sse41.IsSupported) { #pragma warning disable SA1503 // Braces should not be omitted - fixed (ushort* mtxIqPtr = mtx.IQ) - fixed (ushort* mtxQPtr = mtx.Q) - fixed (uint* biasQPtr = mtx.Bias) - fixed (short* sharpenPtr = mtx.Sharpen) + // Load all inputs. + Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); + Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); + Vector128 iq0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(0, 8))); + Vector128 iq8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(8, 8))); + Vector128 q0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(0, 8))); + Vector128 q8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(8, 8))); + + // coeff = abs(in) + Vector128 coeff0 = Ssse3.Abs(input0); + Vector128 coeff8 = Ssse3.Abs(input8); + + // coeff = abs(in) + sharpen + Vector128 sharpen0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(0, 8))); + Vector128 sharpen8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(8, 8))); + Sse2.Add(coeff0.AsInt16(), sharpen0); + Sse2.Add(coeff8.AsInt16(), sharpen8); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); + Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); + Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); + Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); + Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); + Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); + Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); + + // out = (coeff * iQ + B) + Vector128 bias00 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(0, 4))); + Vector128 bias04 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(4, 4))); + Vector128 bias08 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(8, 4))); + Vector128 bias12 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(12, 4))); + out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); + out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // pack result as 16b + Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); + Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Sse2.Min(out0, MaxCoeff2047); + out8 = Sse2.Min(out8, MaxCoeff2047); + + // put sign back + out0 = Ssse3.Sign(out0, input0); + out8 = Ssse3.Sign(out8, input8); + + // in = out * Q + input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); + input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); + fixed (short* inputPtr = input) - fixed (short* outputPtr = output) { - // Load all inputs. - Vector128 input0 = Sse2.LoadVector128(inputPtr); - Vector128 input8 = Sse2.LoadVector128(inputPtr + 8); - Vector128 iq0 = Sse2.LoadVector128(mtxIqPtr); - Vector128 iq8 = Sse2.LoadVector128(mtxIqPtr + 8); - Vector128 q0 = Sse2.LoadVector128(mtxQPtr); - Vector128 q8 = Sse2.LoadVector128(mtxQPtr + 8); - - // coeff = abs(in) - Vector128 coeff0 = Ssse3.Abs(input0); - Vector128 coeff8 = Ssse3.Abs(input8); - - // coeff = abs(in) + sharpen - Vector128 sharpen0 = Sse2.LoadVector128(sharpenPtr); - Vector128 sharpen8 = Sse2.LoadVector128(sharpenPtr + 8); - Sse2.Add(coeff0.AsInt16(), sharpen0); - Sse2.Add(coeff8.AsInt16(), sharpen8); - - // out = (coeff * iQ + B) >> QFIX - // doing calculations with 32b precision (QFIX=17) - // out = (coeff * iQ) - Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); - Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); - Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); - Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); - Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); - Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); - Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); - Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); - - // out = (coeff * iQ + B) - Vector128 bias00 = Sse2.LoadVector128(biasQPtr); - Vector128 bias04 = Sse2.LoadVector128(biasQPtr + 4); - Vector128 bias08 = Sse2.LoadVector128(biasQPtr + 8); - Vector128 bias12 = Sse2.LoadVector128(biasQPtr + 12); - out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); - out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); - out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); - out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); - - // out = QUANTDIV(coeff, iQ, B, QFIX) - out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); - out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); - out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); - out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); - - // pack result as 16b - Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); - Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); - - // if (coeff > 2047) coeff = 2047 - out0 = Sse2.Min(out0, MaxCoeff2047); - out8 = Sse2.Min(out8, MaxCoeff2047); - - // put sign back - out0 = Ssse3.Sign(out0, input0); - out8 = Ssse3.Sign(out8, input8); - - // in = out * Q - input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); - input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); - // in = out * Q Sse2.Store(inputPtr, input0); Sse2.Store(inputPtr + 8, input8); + } - // zigzag the output before storing it. The re-ordering is: - // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 - // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 - // There's only two misplaced entries ([8] and [7]) that are crossing the - // reg's boundaries. - // We use pshufb instead of pshuflo/pshufhi. - Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); - Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 - Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); - Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 - Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); - Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + // zigzag the output before storing it. The re-ordering is: + // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 + // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 + // There's only two misplaced entries ([8] and [7]) that are crossing the + // reg's boundaries. + // We use pshufb instead of pshuflo/pshufhi. + Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); + Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 + Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); + Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 + Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); + Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + + fixed (short* outputPtr = output) + { Sse2.Store(outputPtr, outZ0.AsInt16()); Sse2.Store(outputPtr + 8, outZ8.AsInt16()); - Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); - - // Detect if all 'out' values are zeroes or not. - Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); - return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; } + + Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); + + // Detect if all 'out' values are zeroes or not. + Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); + return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; #pragma warning restore SA1503 // Braces should not be omitted } else From 0c0812de82648be40a35dc63a9b6c914bdcbbbf7 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 16:58:40 +0100 Subject: [PATCH 28/36] Avoid pinning input and output data --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index b300b7b5c..6e25dc003 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -600,12 +600,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); - fixed (short* inputPtr = input) - { - // in = out * Q - Sse2.Store(inputPtr, input0); - Sse2.Store(inputPtr + 8, input8); - } + // in = out * Q + ref short inputRef = ref MemoryMarshal.GetReference(input); + Unsafe.As>(ref inputRef) = input0; + Unsafe.As>(ref Unsafe.Add(ref inputRef, 8)) = input8; // zigzag the output before storing it. The re-ordering is: // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 @@ -620,11 +618,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); - fixed (short* outputPtr = output) - { - Sse2.Store(outputPtr, outZ0.AsInt16()); - Sse2.Store(outputPtr + 8, outZ8.AsInt16()); - } + ref short outputRef = ref MemoryMarshal.GetReference(output); + Unsafe.As>(ref outputRef) = outZ0.AsInt16(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)) = outZ8.AsInt16(); Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); From cffa4b0c366a3d80b7e5c315127ae0a27f1ddb8d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 17:00:18 +0100 Subject: [PATCH 29/36] Only test with and without HardwareIntrinsics --- tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 280a7902a..d0cdfc1de 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -44,13 +44,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); [Fact] - public void QuantizeBlock_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2); - - [Fact] - public void QuantizeBlock_WithoutSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSSE3); - - [Fact] - public void QuantizeBlock_WithoutSSE2AndSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); + public void QuantizeBlock_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableHWIntrinsic); #endif } } From c9fc5cdb56a21deaf78ae4eb73a6e8270c951841 Mon Sep 17 00:00:00 2001 From: Berkan Diler Date: Mon, 8 Nov 2021 18:33:24 +0100 Subject: [PATCH 30/36] Collapse AsSpan().Slice(..) calls into AsSpan(..) --- src/ImageSharp/Formats/Png/PngDecoderCore.cs | 2 +- src/ImageSharp/Formats/Webp/WebpDecoderCore.cs | 2 +- src/ImageSharp/IO/ChunkedMemoryStream.cs | 4 ++-- .../Processors/Transforms/Resize/ResizeKernelMap.cs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs index 987dc150c..cf3cd7eb1 100644 --- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs +++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs @@ -1071,7 +1071,7 @@ namespace SixLabors.ImageSharp.Formats.Png int bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length); while (bytesRead != 0) { - uncompressedBytes.AddRange(this.buffer.AsSpan().Slice(0, bytesRead).ToArray()); + uncompressedBytes.AddRange(this.buffer.AsSpan(0, bytesRead).ToArray()); bytesRead = inflateStream.CompressedStream.Read(this.buffer, 0, this.buffer.Length); } diff --git a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs index 44a55a4c6..09071406c 100644 --- a/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpDecoderCore.cs @@ -306,7 +306,7 @@ namespace SixLabors.ImageSharp.Formats.Webp // Check for VP8 magic bytes. this.currentStream.Read(this.buffer, 0, 3); - if (!this.buffer.AsSpan().Slice(0, 3).SequenceEqual(WebpConstants.Vp8HeaderMagicBytes)) + if (!this.buffer.AsSpan(0, 3).SequenceEqual(WebpConstants.Vp8HeaderMagicBytes)) { WebpThrowHelper.ThrowImageFormatException("VP8 magic bytes not found"); } diff --git a/src/ImageSharp/IO/ChunkedMemoryStream.cs b/src/ImageSharp/IO/ChunkedMemoryStream.cs index b9220c56a..e28baf879 100644 --- a/src/ImageSharp/IO/ChunkedMemoryStream.cs +++ b/src/ImageSharp/IO/ChunkedMemoryStream.cs @@ -243,7 +243,7 @@ namespace SixLabors.ImageSharp.IO const string bufferMessage = "Offset subtracted from the buffer length is less than count."; Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage); - return this.ReadImpl(buffer.AsSpan().Slice(offset, count)); + return this.ReadImpl(buffer.AsSpan(offset, count)); } #if SUPPORTS_SPAN_STREAM @@ -359,7 +359,7 @@ namespace SixLabors.ImageSharp.IO const string bufferMessage = "Offset subtracted from the buffer length is less than count."; Guard.IsFalse(buffer.Length - offset < count, nameof(buffer), bufferMessage); - this.WriteImpl(buffer.AsSpan().Slice(offset, count)); + this.WriteImpl(buffer.AsSpan(offset, count)); } #if SUPPORTS_SPAN_STREAM diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs index a58c20f68..9cc468060 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs @@ -216,7 +216,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms ResizeKernel kernel = this.CreateKernel(dataRowIndex, left, right); - Span kernelValues = this.tempValues.AsSpan().Slice(0, kernel.Length); + Span kernelValues = this.tempValues.AsSpan(0, kernel.Length); double sum = 0; for (int j = left; j <= right; j++) From 670e2eeafc14b7c16757f1b909eb552a9e61b1ca Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 9 Nov 2021 11:43:19 +1100 Subject: [PATCH 31/36] Update ColorTests.CastTo.cs --- .../ImageSharp.Tests/Color/ColorTests.CastTo.cs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs index af35d1f89..3003265ca 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs @@ -90,16 +90,25 @@ namespace SixLabors.ImageSharp.Tests } [Fact] - public void TPixel() + public void GenericPixel() { - var source = new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue); + AssertGenericPixel(new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue)); + AssertGenericPixel(new Rgba64(1, 2, ushort.MaxValue, ushort.MaxValue - 1)); + AssertGenericPixel(new Rgb48(1, 2, ushort.MaxValue - 1)); + AssertGenericPixel(new La32(1, ushort.MaxValue - 1)); + AssertGenericPixel(new L16(ushort.MaxValue - 1)); + AssertGenericPixel(new Rgba32(1, 2, 255, 254)); + } + private static void AssertGenericPixel(TPixel source) + where TPixel : unmanaged, IPixel + { // Act: var color = Color.FromPixel(source); // Assert: - RgbaVector data = color.ToPixel(); - Assert.Equal(source, data); + TPixel actual = color.ToPixel(); + Assert.Equal(source, actual); } } } From cb513a905c52e843440f14c70e40fe9192737e91 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 11:05:18 +0100 Subject: [PATCH 32/36] Use fixed sized arrays in Vp8Matrix --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 20 ++++---- .../Formats/Webp/Lossy/Vp8Encoder.cs | 8 +--- .../Formats/Webp/Lossy/Vp8Matrix.cs | 47 +++++-------------- .../Formats/Webp/Lossy/Vp8SegmentInfo.cs | 12 ++--- .../Formats/WebP/QuantEncTests.cs | 17 ++++--- 5 files changed, 41 insertions(+), 63 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 6e25dc003..4c3a2ff5e 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -541,18 +541,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Load all inputs. Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); - Vector128 iq0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(0, 8))); - Vector128 iq8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(8, 8))); - Vector128 q0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(0, 8))); - Vector128 q8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(8, 8))); + Vector128 iq0 = Unsafe.As>(ref mtx.IQ[0]); + Vector128 iq8 = Unsafe.As>(ref mtx.IQ[8]); + Vector128 q0 = Unsafe.As>(ref mtx.Q[0]); + Vector128 q8 = Unsafe.As>(ref mtx.Q[8]); // coeff = abs(in) Vector128 coeff0 = Ssse3.Abs(input0); Vector128 coeff8 = Ssse3.Abs(input8); // coeff = abs(in) + sharpen - Vector128 sharpen0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(0, 8))); - Vector128 sharpen8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(8, 8))); + Vector128 sharpen0 = Unsafe.As>(ref mtx.Sharpen[0]); + Vector128 sharpen8 = Unsafe.As>(ref mtx.Sharpen[8]); Sse2.Add(coeff0.AsInt16(), sharpen0); Sse2.Add(coeff8.AsInt16(), sharpen8); @@ -569,10 +569,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); // out = (coeff * iQ + B) - Vector128 bias00 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(0, 4))); - Vector128 bias04 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(4, 4))); - Vector128 bias08 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(8, 4))); - Vector128 bias12 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(12, 4))); + Vector128 bias00 = Unsafe.As>(ref mtx.Bias[0]); + Vector128 bias04 = Unsafe.As>(ref mtx.Bias[4]); + Vector128 bias08 = Unsafe.As>(ref mtx.Bias[8]); + Vector128 bias12 = Unsafe.As>(ref mtx.Bias[12]); out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 728574682..8a4115d21 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -502,7 +502,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.ResetStats(); } - private void AdjustFilterStrength() + private unsafe void AdjustFilterStrength() { if (this.filterStrength > 0) { @@ -806,7 +806,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy proba.NbSkip = 0; } - private void SetupMatrices(Vp8SegmentInfo[] dqm) + private unsafe void SetupMatrices(Vp8SegmentInfo[] dqm) { int tlambdaScale = this.method >= WebpEncodingMethod.Default ? this.spatialNoiseShaping : 0; for (int i = 0; i < dqm.Length; i++) @@ -814,10 +814,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vp8SegmentInfo m = dqm[i]; int q = m.Quant; - m.Y1 = new Vp8Matrix(); - m.Y2 = new Vp8Matrix(); - m.Uv = new Vp8Matrix(); - m.Y1.Q[0] = WebpLookupTables.DcTable[Numerics.Clamp(q + this.DqY1Dc, 0, 127)]; m.Y1.Q[1] = WebpLookupTables.AcTable[Numerics.Clamp(q, 0, 127)]; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs index e525e388b..66c91e44a 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs @@ -3,7 +3,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal class Vp8Matrix + internal unsafe struct Vp8Matrix { private static readonly int[][] BiasMatrices = { @@ -23,50 +23,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private const int SharpenBits = 11; /// - /// Initializes a new instance of the class. + /// The quantizer steps. /// - public Vp8Matrix() - { - this.Q = new ushort[16]; - this.IQ = new ushort[16]; - this.Bias = new uint[16]; - this.ZThresh = new uint[16]; - this.Sharpen = new short[16]; - } - - public Vp8Matrix(ushort[] q, ushort[] iq, uint[] bias, uint[] zThresh, short[] sharpen) - { - this.Q = q; - this.IQ = iq; - this.Bias = bias; - this.ZThresh = zThresh; - this.Sharpen = sharpen; - } - - /// - /// Gets the quantizer steps. - /// - public ushort[] Q { get; } + public fixed ushort Q[16]; /// - /// Gets the reciprocals, fixed point. + /// The reciprocals, fixed point. /// - public ushort[] IQ { get; } + public fixed ushort IQ[16]; /// - /// Gets the rounding bias. + /// The rounding bias. /// - public uint[] Bias { get; } + public fixed uint Bias[16]; /// - /// Gets the value below which a coefficient is zeroed. + /// The value below which a coefficient is zeroed. /// - public uint[] ZThresh { get; } + public fixed uint ZThresh[16]; /// - /// Gets the frequency boosters for slight sharpening. + /// The frequency boosters for slight sharpening. /// - public short[] Sharpen { get; } + public fixed short Sharpen[16]; /// /// Returns the average quantizer. @@ -81,7 +60,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int isAcCoeff = i > 0 ? 1 : 0; int bias = BiasMatrices[type][isAcCoeff]; this.IQ[i] = (ushort)((1 << WebpConstants.QFix) / this.Q[i]); - this.Bias[i] = (uint)this.BIAS(bias); + this.Bias[i] = (uint)BIAS(bias); // zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is: // * zero if coeff <= zthresh @@ -115,6 +94,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return (sum + 8) >> 4; } - private int BIAS(int b) => b << (WebpConstants.QFix - 8); + private static int BIAS(int b) => b << (WebpConstants.QFix - 8); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs index cf2a5c177..71983055c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs @@ -8,19 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy internal class Vp8SegmentInfo { /// - /// Gets or sets the quantization matrix y1. + /// Gets the quantization matrix y1. /// - public Vp8Matrix Y1 { get; set; } + public Vp8Matrix Y1; /// - /// Gets or sets the quantization matrix y2. + /// Gets the quantization matrix y2. /// - public Vp8Matrix Y2 { get; set; } + public Vp8Matrix Y2; /// - /// Gets or sets the quantization matrix uv. + /// Gets the quantization matrix uv. /// - public Vp8Matrix Uv { get; set; } + public Vp8Matrix Uv; /// /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index d0cdfc1de..7465c42ce 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -11,22 +11,25 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP [Trait("Format", "Webp")] public class QuantEncTests { - private static void RunQuantizeBlockTest() + private static unsafe void RunQuantizeBlockTest() { // arrange short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 }; short[] output = new short[16]; ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 }; ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 }; - uint[] bias = - { - 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, - 55296, 55296 - }; + uint[] bias = { 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296 }; uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 }; short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 }; int expectedResult = 1; - var vp8Matrix = new Vp8Matrix(q, iq, bias, zthresh, new short[16]); + Vp8Matrix vp8Matrix = default; + for (int i = 0; i < 16; i++) + { + vp8Matrix.Q[i] = q[i]; + vp8Matrix.IQ[i] = iq[i]; + vp8Matrix.Bias[i] = bias[i]; + vp8Matrix.ZThresh[i] = zthresh[i]; + } // act int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); From 6e135cbd79f391f56ee69df0da2b8be505631491 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 12:38:41 +0100 Subject: [PATCH 33/36] Avoid pinning --- .../Formats/Webp/Lossy/LossyUtils.cs | 219 +++++++++--------- 1 file changed, 107 insertions(+), 112 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index b8f232a43..ee224e0b0 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,6 +4,7 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -614,120 +615,114 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { Span sum = scratch.Slice(0, 4); sum.Clear(); -#pragma warning disable SA1503 // Braces should not be omitted - fixed (byte* inputAPtr = inputA) - fixed (byte* inputBPtr = inputB) - fixed (ushort* wPtr = w) - fixed (int* outputPtr = sum) - { - // Load and combine inputs. - Vector128 ina0 = Sse2.LoadVector128(inputAPtr); - Vector128 ina1 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 1)); - Vector128 ina2 = Sse2.LoadVector128(inputAPtr + (WebpConstants.Bps * 2)); - Vector128 ina3 = Sse2.LoadVector128((long*)(inputAPtr + (WebpConstants.Bps * 3))); - Vector128 inb0 = Sse2.LoadVector128(inputBPtr); - Vector128 inb1 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 1)); - Vector128 inb2 = Sse2.LoadVector128(inputBPtr + (WebpConstants.Bps * 2)); - Vector128 inb3 = Sse2.LoadVector128((long*)(inputBPtr + (WebpConstants.Bps * 3))); - - // Combine inA and inB (we'll do two transforms in parallel). - Vector128 inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32()); - Vector128 inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32()); - Vector128 inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32()); - Vector128 inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32()); - Vector128 tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte()); - Vector128 tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte()); - Vector128 tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte()); - Vector128 tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte()); - - // a00 a01 a02 a03 b00 b01 b02 b03 - // a10 a11 a12 a13 b10 b11 b12 b13 - // a20 a21 a22 a23 b20 b21 b22 b23 - // a30 a31 a32 a33 b30 b31 b32 b33 - // Vertical pass first to avoid a transpose (vertical and horizontal passes - // are commutative because w/kWeightY is symmetric) and subsequent transpose. - // Calculate a and b (two 4x4 at once). - Vector128 a0 = Sse2.Add(tmp0, tmp2); - Vector128 a1 = Sse2.Add(tmp1, tmp3); - Vector128 a2 = Sse2.Subtract(tmp1, tmp3); - Vector128 a3 = Sse2.Subtract(tmp0, tmp2); - Vector128 b0 = Sse2.Add(a0, a1); - Vector128 b1 = Sse2.Add(a3, a2); - Vector128 b2 = Sse2.Subtract(a3, a2); - Vector128 b3 = Sse2.Subtract(a0, a1); - - // a00 a01 a02 a03 b00 b01 b02 b03 - // a10 a11 a12 a13 b10 b11 b12 b13 - // a20 a21 a22 a23 b20 b21 b22 b23 - // a30 a31 a32 a33 b30 b31 b32 b33 - // Transpose the two 4x4. - Vector128 transpose00 = Sse2.UnpackLow(b0, b1); - Vector128 transpose01 = Sse2.UnpackLow(b2, b3); - Vector128 transpose02 = Sse2.UnpackHigh(b0, b1); - Vector128 transpose03 = Sse2.UnpackHigh(b2, b3); - - // a00 a10 a01 a11 a02 a12 a03 a13 - // a20 a30 a21 a31 a22 a32 a23 a33 - // b00 b10 b01 b11 b02 b12 b03 b13 - // b20 b30 b21 b31 b22 b32 b23 b33 - Vector128 transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32()); - Vector128 transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32()); - Vector128 transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32()); - Vector128 transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32()); - - // a00 a10 a20 a30 a01 a11 a21 a31 - // b00 b10 b20 b30 b01 b11 b21 b31 - // a02 a12 a22 a32 a03 a13 a23 a33 - // b02 b12 a22 b32 b03 b13 b23 b33 - Vector128 output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64()); - Vector128 output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64()); - Vector128 output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64()); - Vector128 output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64()); - - // a00 a10 a20 a30 b00 b10 b20 b30 - // a01 a11 a21 a31 b01 b11 b21 b31 - // a02 a12 a22 a32 b02 b12 b22 b32 - // a03 a13 a23 a33 b03 b13 b23 b33 - // Horizontal pass and difference of weighted sums. - Vector128 w0 = Sse2.LoadVector128(wPtr); - Vector128 w8 = Sse2.LoadVector128(wPtr + 8); - - // Calculate a and b (two 4x4 at once). - a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16()); - a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16()); - a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16()); - a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16()); - b0 = Sse2.Add(a0, a1); - b1 = Sse2.Add(a3, a2); - b2 = Sse2.Subtract(a3, a2); - b3 = Sse2.Subtract(a0, a1); - - // Separate the transforms of inA and inB. - Vector128 ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64()); - Vector128 ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64()); - Vector128 bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64()); - Vector128 bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64()); - - Vector128 ab0Abs = Ssse3.Abs(ab0.AsInt16()); - Vector128 ab2Abs = Ssse3.Abs(ab2.AsInt16()); - Vector128 b0Abs = Ssse3.Abs(bb0.AsInt16()); - Vector128 bb2Abs = Ssse3.Abs(bb2.AsInt16()); - - // weighted sums. - Vector128 ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16()); - Vector128 ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16()); - Vector128 b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16()); - Vector128 bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16()); - Vector128 ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8); - Vector128 b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8); - - // difference of weighted sums. - Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); - Sse2.Store(outputPtr, result.AsInt32()); - } + // Load and combine inputs. + Vector128 ina0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA)); + Vector128 ina1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps, 16))); + Vector128 ina2 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 2, 16))); + Vector128 ina3 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps * 3, 16))).AsInt64(); + Vector128 inb0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB)); + Vector128 inb1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps, 16))); + Vector128 inb2 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 2, 16))); + Vector128 inb3 = Unsafe.As>(ref MemoryMarshal.GetReference(inputB.Slice(WebpConstants.Bps * 3, 16))).AsInt64(); + + // Combine inA and inB (we'll do two transforms in parallel). + Vector128 inab0 = Sse2.UnpackLow(ina0.AsInt32(), inb0.AsInt32()); + Vector128 inab1 = Sse2.UnpackLow(ina1.AsInt32(), inb1.AsInt32()); + Vector128 inab2 = Sse2.UnpackLow(ina2.AsInt32(), inb2.AsInt32()); + Vector128 inab3 = Sse2.UnpackLow(ina3.AsInt32(), inb3.AsInt32()); + Vector128 tmp0 = Sse41.ConvertToVector128Int16(inab0.AsByte()); + Vector128 tmp1 = Sse41.ConvertToVector128Int16(inab1.AsByte()); + Vector128 tmp2 = Sse41.ConvertToVector128Int16(inab2.AsByte()); + Vector128 tmp3 = Sse41.ConvertToVector128Int16(inab3.AsByte()); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Vertical pass first to avoid a transpose (vertical and horizontal passes + // are commutative because w/kWeightY is symmetric) and subsequent transpose. + // Calculate a and b (two 4x4 at once). + Vector128 a0 = Sse2.Add(tmp0, tmp2); + Vector128 a1 = Sse2.Add(tmp1, tmp3); + Vector128 a2 = Sse2.Subtract(tmp1, tmp3); + Vector128 a3 = Sse2.Subtract(tmp0, tmp2); + Vector128 b0 = Sse2.Add(a0, a1); + Vector128 b1 = Sse2.Add(a3, a2); + Vector128 b2 = Sse2.Subtract(a3, a2); + Vector128 b3 = Sse2.Subtract(a0, a1); + + // a00 a01 a02 a03 b00 b01 b02 b03 + // a10 a11 a12 a13 b10 b11 b12 b13 + // a20 a21 a22 a23 b20 b21 b22 b23 + // a30 a31 a32 a33 b30 b31 b32 b33 + // Transpose the two 4x4. + Vector128 transpose00 = Sse2.UnpackLow(b0, b1); + Vector128 transpose01 = Sse2.UnpackLow(b2, b3); + Vector128 transpose02 = Sse2.UnpackHigh(b0, b1); + Vector128 transpose03 = Sse2.UnpackHigh(b2, b3); + + // a00 a10 a01 a11 a02 a12 a03 a13 + // a20 a30 a21 a31 a22 a32 a23 a33 + // b00 b10 b01 b11 b02 b12 b03 b13 + // b20 b30 b21 b31 b22 b32 b23 b33 + Vector128 transpose10 = Sse2.UnpackLow(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose11 = Sse2.UnpackLow(transpose02.AsInt32(), transpose03.AsInt32()); + Vector128 transpose12 = Sse2.UnpackHigh(transpose00.AsInt32(), transpose01.AsInt32()); + Vector128 transpose13 = Sse2.UnpackHigh(transpose02.AsInt32(), transpose03.AsInt32()); + + // a00 a10 a20 a30 a01 a11 a21 a31 + // b00 b10 b20 b30 b01 b11 b21 b31 + // a02 a12 a22 a32 a03 a13 a23 a33 + // b02 b12 a22 b32 b03 b13 b23 b33 + Vector128 output0 = Sse2.UnpackLow(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output1 = Sse2.UnpackHigh(transpose10.AsInt64(), transpose11.AsInt64()); + Vector128 output2 = Sse2.UnpackLow(transpose12.AsInt64(), transpose13.AsInt64()); + Vector128 output3 = Sse2.UnpackHigh(transpose12.AsInt64(), transpose13.AsInt64()); + + // a00 a10 a20 a30 b00 b10 b20 b30 + // a01 a11 a21 a31 b01 b11 b21 b31 + // a02 a12 a22 a32 b02 b12 b22 b32 + // a03 a13 a23 a33 b03 b13 b23 b33 + // Horizontal pass and difference of weighted sums. + Vector128 w0 = Unsafe.As>(ref MemoryMarshal.GetReference(w)); + Vector128 w8 = Unsafe.As>(ref MemoryMarshal.GetReference(w.Slice(8, 8))); + + // Calculate a and b (two 4x4 at once). + a0 = Sse2.Add(output0.AsInt16(), output2.AsInt16()); + a1 = Sse2.Add(output1.AsInt16(), output3.AsInt16()); + a2 = Sse2.Subtract(output1.AsInt16(), output3.AsInt16()); + a3 = Sse2.Subtract(output0.AsInt16(), output2.AsInt16()); + b0 = Sse2.Add(a0, a1); + b1 = Sse2.Add(a3, a2); + b2 = Sse2.Subtract(a3, a2); + b3 = Sse2.Subtract(a0, a1); + + // Separate the transforms of inA and inB. + Vector128 ab0 = Sse2.UnpackLow(b0.AsInt64(), b1.AsInt64()); + Vector128 ab2 = Sse2.UnpackLow(b2.AsInt64(), b3.AsInt64()); + Vector128 bb0 = Sse2.UnpackHigh(b0.AsInt64(), b1.AsInt64()); + Vector128 bb2 = Sse2.UnpackHigh(b2.AsInt64(), b3.AsInt64()); + + Vector128 ab0Abs = Ssse3.Abs(ab0.AsInt16()); + Vector128 ab2Abs = Ssse3.Abs(ab2.AsInt16()); + Vector128 b0Abs = Ssse3.Abs(bb0.AsInt16()); + Vector128 bb2Abs = Ssse3.Abs(bb2.AsInt16()); + + // weighted sums. + Vector128 ab0mulw0 = Sse2.MultiplyAddAdjacent(ab0Abs.AsInt16(), w0.AsInt16()); + Vector128 ab2mulw8 = Sse2.MultiplyAddAdjacent(ab2Abs.AsInt16(), w8.AsInt16()); + Vector128 b0mulw0 = Sse2.MultiplyAddAdjacent(b0Abs.AsInt16(), w0.AsInt16()); + Vector128 bb2mulw8 = Sse2.MultiplyAddAdjacent(bb2Abs.AsInt16(), w8.AsInt16()); + Vector128 ab0ab2Sum = Sse2.Add(ab0mulw0, ab2mulw8); + Vector128 b0w0bb2w8Sum = Sse2.Add(b0mulw0, bb2mulw8); + + // difference of weighted sums. + Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); + + ref int outputRef = ref MemoryMarshal.GetReference(sum); + Unsafe.As>(ref outputRef) = result.AsInt32(); return sum[3] + sum[2] + sum[1] + sum[0]; -#pragma warning restore SA1503 // Braces should not be omitted } #endif From d6d1868343831184d94482895e5f4d3837e643cf Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 12:40:27 +0100 Subject: [PATCH 34/36] Test Hadamard transform only with and without HardwareIntrinsics --- tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index 349a0c8fc..f8b488fde 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -45,13 +45,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll); [Fact] - public void HadamardTransform_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE2); - - [Fact] - public void HadamardTransform_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41); - - [Fact] - public void HadamardTransform_WithoutSSE2AndSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableSSE41 | HwIntrinsics.DisableSSE2); + public void HadamardTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableHWIntrinsic); #endif } From 42c2cf7a799af7c5a6b504ec6233fc6a7308c030 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 13:40:40 +0100 Subject: [PATCH 35/36] Disable SA1401 in file: Fields should be private --- src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs index 71983055c..2ce383d9e 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs @@ -10,6 +10,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// /// Gets the quantization matrix y1. /// +#pragma warning disable SA1401 // Fields should be private public Vp8Matrix Y1; /// @@ -21,6 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Gets the quantization matrix uv. /// public Vp8Matrix Uv; +#pragma warning restore SA1401 // Fields should be private /// /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. From 8160a0eeb6a7bb5e8dc65ca1827a754d5a0e1e81 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 13:40:54 +0100 Subject: [PATCH 36/36] Pass Vp8Matrix as ref --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 34 +++++++++---------- .../Formats/WebP/QuantEncTests.cs | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 4c3a2ff5e..97ef27d25 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -315,14 +315,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } Vp8Encoding.FTransformWht(tmp, dcTmp, scratch); - nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; + nz |= QuantizeBlock(dcTmp, rd.YDcLevels, ref dqm.Y2) << 24; for (n = 0; n < 16; n += 2) { // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. tmp[n * 16] = tmp[(n + 1) * 16] = 0; - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), ref dqm.Y1) << n; } // Transform back. @@ -343,7 +343,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy tmp.Clear(); scratch.Clear(); Vp8Encoding.FTransform(src, reference, tmp, scratch); - int nz = QuantizeBlock(tmp, levels, dqm.Y1); + int nz = QuantizeBlock(tmp, levels, ref dqm.Y1); Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); return nz; @@ -370,11 +370,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy scratch); } - CorrectDcValues(it, dqm.Uv, tmp, rd); + CorrectDcValues(it, ref dqm.Uv, tmp, rd); for (n = 0; n < 8; n += 2) { - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), ref dqm.Uv) << n; } for (n = 0; n < 8; n += 2) @@ -525,19 +525,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } [MethodImpl(InliningOptions.ShortMethod)] - public static int Quantize2Blocks(Span input, Span output, Vp8Matrix mtx) + public static int Quantize2Blocks(Span input, Span output, ref Vp8Matrix mtx) { - int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), mtx) << 0; - nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), mtx) << 1; + int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), ref mtx) << 0; + nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), ref mtx) << 1; return nz; } - public static int QuantizeBlock(Span input, Span output, Vp8Matrix mtx) + public static int QuantizeBlock(Span input, Span output, ref Vp8Matrix mtx) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) { -#pragma warning disable SA1503 // Braces should not be omitted // Load all inputs. Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); @@ -624,10 +623,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); - // Detect if all 'out' values are zeroes or not. + // Detect if all 'out' values are zeros or not. Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; -#pragma warning restore SA1503 // Braces should not be omitted } else #endif @@ -675,7 +673,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Quantize as usual, but also compute and return the quantization error. // Error is already divided by DSHIFT. - public static int QuantizeSingle(Span v, Vp8Matrix mtx) + public static int QuantizeSingle(Span v, ref Vp8Matrix mtx) { int v0 = v[0]; bool sign = v0 < 0; @@ -696,7 +694,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return (sign ? -v0 : v0) >> DSCALE; } - public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) + public static void CorrectDcValues(Vp8EncIterator it, ref Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) { #pragma warning disable SA1005 // Single line comments should begin with single space // | top[0] | top[1] @@ -713,13 +711,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span left = it.LeftDerr.AsSpan(ch, 2); Span c = tmp.Slice(ch * 4 * 16, 4 * 16); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); - int err0 = QuantizeSingle(c, mtx); + int err0 = QuantizeSingle(c, ref mtx); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); - int err1 = QuantizeSingle(c.Slice(1 * 16), mtx); + int err1 = QuantizeSingle(c.Slice(1 * 16), ref mtx); c[2 * 16] += (short)(((C1 * err0) + (C2 * left[1])) >> (DSHIFT - DSCALE)); - int err2 = QuantizeSingle(c.Slice(2 * 16), mtx); + int err2 = QuantizeSingle(c.Slice(2 * 16), ref mtx); c[3 * 16] += (short)(((C1 * err1) + (C2 * err2)) >> (DSHIFT - DSCALE)); - int err3 = QuantizeSingle(c.Slice(3 * 16), mtx); + int err3 = QuantizeSingle(c.Slice(3 * 16), ref mtx); rd.Derr[ch, 0] = err1; rd.Derr[ch, 1] = err2; diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 7465c42ce..55738199b 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -32,7 +32,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP } // act - int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); + int actualResult = QuantEnc.QuantizeBlock(input, output, ref vp8Matrix); // assert Assert.True(output.SequenceEqual(expectedOutput));