diff --git a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs index a516ceb5a..c38a82b42 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs @@ -2,8 +2,6 @@ using System.Numerics; using System.Runtime.CompilerServices; -// ReSharper disable InconsistentNaming - namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder { internal abstract partial class JpegColorConverter @@ -55,8 +53,79 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder { } + public static bool IsAvailable => Vector.IsHardwareAccelerated && Vector.Count == 8; + public override void ConvertToRGBA(ComponentValues values, Span result) - { + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount)); + } + + FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder)); + } + + /// + /// SIMD convert using buffers of sizes divisable by 8. + /// + internal static void ConvertCore(ComponentValues values, Span result) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + + ref Vector yBase = + ref Unsafe.As>(ref values.Component0.DangerousGetPinnableReference()); + ref Vector cbBase = + ref Unsafe.As>(ref values.Component1.DangerousGetPinnableReference()); + ref Vector crBase = + ref Unsafe.As>(ref values.Component2.DangerousGetPinnableReference()); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref result.DangerousGetPinnableReference()); + + var chromaOffset = new Vector(-128f); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + // Vector has no .Clamp(), need to switch to Vector4 for the next operation: + // TODO: Is it worth to use Vector at all? + Vector4Pair rr = Unsafe.As, Vector4Pair>(ref r); + Vector4Pair gg = Unsafe.As, Vector4Pair>(ref g); + Vector4Pair bb = Unsafe.As, Vector4Pair>(ref b); + + rr.RoundAndDownscale(); + gg.RoundAndDownscale(); + bb.RoundAndDownscale(); + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Collect(ref rr, ref gg, ref bb); + } } private struct Vector4Pair @@ -79,21 +148,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder { #pragma warning disable SA1132 // Do not combine fields public Vector4 V0, V1, V2, V3, V4, V5, V6, V7; -#pragma warning restore SA1132 // Do not combine fields - - public static Vector4Octet CreateCollector() - { - var result = default(Vector4Octet); - result.V0.W = 1f; - result.V1.W = 1f; - result.V2.W = 1f; - result.V3.W = 1f; - result.V4.W = 1f; - result.V5.W = 1f; - result.V6.W = 1f; - result.V7.W = 1f; - return result; - } public void Collect(ref Vector4Pair rr, ref Vector4Pair gg, ref Vector4Pair bb) { @@ -138,63 +192,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder this.V7.W = 1f; } } - - internal static void ConvertAligned(ComponentValues values, Span result) - { - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - - ref Vector yBase = - ref Unsafe.As>(ref values.Component0.DangerousGetPinnableReference()); - ref Vector cbBase = - ref Unsafe.As>(ref values.Component1.DangerousGetPinnableReference()); - ref Vector crBase = - ref Unsafe.As>(ref values.Component2.DangerousGetPinnableReference()); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref result.DangerousGetPinnableReference()); - - var chromaOffset = new Vector(-128f); - - int n = result.Length / 8; - - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); - - // Vector has no .Clamp(), need to switch to Vector4 for the next operation: - // TODO: Is it worth to use Vector at all? - Vector4Pair rr = Unsafe.As, Vector4Pair>(ref r); - Vector4Pair gg = Unsafe.As, Vector4Pair>(ref g); - Vector4Pair bb = Unsafe.As, Vector4Pair>(ref b); - - rr.RoundAndDownscale(); - gg.RoundAndDownscale(); - bb.RoundAndDownscale(); - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Collect(ref rr, ref gg, ref bb); - } - } - - public static bool IsAvailable => Vector.Count == 8; } } } \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.cs index 0d0b6d3e9..59d695d55 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.cs @@ -108,6 +108,30 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder } } } + + private ComponentValues( + int componentCount, + ReadOnlySpan c0, + ReadOnlySpan c1, + ReadOnlySpan c2, + ReadOnlySpan c3) + { + this.ComponentCount = componentCount; + this.Component0 = c0; + this.Component1 = c1; + this.Component2 = c2; + this.Component3 = c3; + } + + public ComponentValues Slice(int start, int length) + { + ReadOnlySpan c0 = this.Component0.Slice(start, length); + ReadOnlySpan c1 = this.ComponentCount > 1 ? this.Component1.Slice(start, length) : ReadOnlySpan.Empty; + ReadOnlySpan c2 = this.ComponentCount > 2 ? this.Component2.Slice(start, length) : ReadOnlySpan.Empty; + ReadOnlySpan c3 = this.ComponentCount > 3 ? this.Component3.Slice(start, length) : ReadOnlySpan.Empty; + + return new ComponentValues(this.ComponentCount, c0, c1, c2, c3); + } } } } \ No newline at end of file diff --git a/tests/ImageSharp.Sandbox46/Program.cs b/tests/ImageSharp.Sandbox46/Program.cs index 532bf9574..869a720df 100644 --- a/tests/ImageSharp.Sandbox46/Program.cs +++ b/tests/ImageSharp.Sandbox46/Program.cs @@ -41,14 +41,21 @@ namespace SixLabors.ImageSharp.Sandbox46 /// public static void Main(string[] args) { - RunDecodeJpegProfilingTests(); - // RunToVector4ProfilingTest(); + RunJpegColorProfilingTests(); - //RunResizeProfilingTest(); + // RunDecodeJpegProfilingTests(); + // RunToVector4ProfilingTest(); + // RunResizeProfilingTest(); Console.ReadLine(); } + private static void RunJpegColorProfilingTests() + { + new JpegColorConverterTests(new ConsoleOutput()).BenchmarkYCbCr(false); + new JpegColorConverterTests(new ConsoleOutput()).BenchmarkYCbCr(true); + } + private static void RunResizeProfilingTest() { ResizeProfilingBenchmarks test = new ResizeProfilingBenchmarks(new ConsoleOutput()); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 885a8f809..a414ee977 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -65,9 +65,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYCbCr(int inputBufferLength, int resultBufferLength, int seed) + public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed) { - ValidateConversion(JpegColorSpace.YCbCr, 3, inputBufferLength, resultBufferLength, seed, ValidateYCbCr); + ValidateConversion(new JpegColorConverter.FromYCbCrBasic(), 3, inputBufferLength, resultBufferLength, seed, ValidateYCbCr); } private static void ValidateYCbCr(JpegColorConverter.ComponentValues values, Span result, int i) @@ -81,14 +81,56 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); var expected = ColorSpaceConverter.ToRgb(ycbcr); - Assert.True(actual.AlmostEquals(expected, Precision)); + Assert.True(actual.AlmostEquals(expected, Precision), $"{actual} != {expected}"); Assert.Equal(1, rgba.W); } - [Fact] - public void ConvertFromYCbCr_SimdWithAlignedValues() + [Theory] + [InlineData(64, 1)] + [InlineData(16, 2)] + [InlineData(8, 3)] + public void FromYCbCrSimd256_ConvertCore(int size, int seed) + { + ValidateConversion(JpegColorConverter.FromYCbCrSimd256.ConvertCore, 3, size, size, seed, ValidateYCbCr); + } + + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrSimd256(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion(new JpegColorConverter.FromYCbCrSimd256(), 3, inputBufferLength, resultBufferLength, seed, ValidateYCbCr); + } + + [Theory] + [MemberData(nameof(CommonConversionData))] + public void ConvertFromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { - ValidateConversion(JpegColorConverter.FromYCbCrSimd256.ConvertAligned, 3, 64, 64, 1, ValidateYCbCr); + ValidateConversion(JpegColorSpace.YCbCr, 3, inputBufferLength, resultBufferLength, seed, ValidateYCbCr); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void BenchmarkYCbCr(bool simd) + { + int count = 2053; + int times = 50000; + + JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); + Vector4[] result = new Vector4[count]; + + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd256() : new JpegColorConverter.FromYCbCrBasic(); + + // Warm up: + converter.ConvertToRGBA(values, result); + + using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) + { + for (int i = 0; i < times; i++) + { + converter.ConvertToRGBA(values, result); + } + } } [Theory] @@ -243,7 +285,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Action, int> validatePixelValue) { ValidateConversion( - (v, r) => JpegColorConverter.GetConverter(colorSpace).ConvertToRGBA(v, r), + JpegColorConverter.GetConverter(colorSpace), + componentCount, + inputBufferLength, + resultBufferLength, + seed, + validatePixelValue); + } + + private static void ValidateConversion( + JpegColorConverter converter, + int componentCount, + int inputBufferLength, + int resultBufferLength, + int seed, + Action, int> validatePixelValue) + { + ValidateConversion( + converter.ConvertToRGBA, componentCount, inputBufferLength, resultBufferLength, diff --git a/tests/ImageSharp.Tests/TestUtilities/MeasureFixture.cs b/tests/ImageSharp.Tests/TestUtilities/MeasureFixture.cs index 7725994c4..c892c09de 100644 --- a/tests/ImageSharp.Tests/TestUtilities/MeasureFixture.cs +++ b/tests/ImageSharp.Tests/TestUtilities/MeasureFixture.cs @@ -56,4 +56,27 @@ namespace SixLabors.ImageSharp.Tests protected ITestOutputHelper Output { get; } } + + public class MeasureGuard : IDisposable + { + private readonly string operation; + + private readonly Stopwatch stopwatch = new Stopwatch(); + + public MeasureGuard(ITestOutputHelper output, string operation) + { + this.operation = operation; + this.Output = output; + this.Output.WriteLine(operation + " ..."); + this.stopwatch.Start(); + } + + private ITestOutputHelper Output { get; } + + public void Dispose() + { + this.stopwatch.Stop(); + this.Output.WriteLine($"{this.operation} completed in {this.stopwatch.ElapsedMilliseconds}ms"); + } + } } \ No newline at end of file