diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 3039eb326..12d66ed94 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -6,6 +6,10 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp { @@ -28,7 +32,7 @@ namespace SixLabors.ImageSharp [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector4 PseudoRound(this Vector4 v) { - var sign = Vector4Utilities.FastClamp(v, new Vector4(-1), new Vector4(1)); + Vector4 sign = Vector4Utilities.FastClamp(v, new Vector4(-1), new Vector4(1)); return v + (sign * 0.5f); } @@ -44,13 +48,24 @@ namespace SixLabors.ImageSharp [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector FastRound(this Vector v) { - var magic0 = new Vector(int.MinValue); // 0x80000000 - Vector sgn0 = Vector.AsVectorSingle(magic0); - Vector and0 = Vector.BitwiseAnd(sgn0, v); - Vector or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f)); - Vector add0 = Vector.Add(v, or0); - Vector sub0 = Vector.Subtract(add0, or0); - return sub0; +#if SUPPORTS_RUNTIME_INTRINSICS + + if (Avx.IsSupported) + { + ref Vector256 v256 = ref Unsafe.As, Vector256>(ref v); + Vector256 vRound = Avx.RoundToNearestInteger(v256); + return Unsafe.As, Vector>(ref vRound); + } + else +#endif + { + var magic0 = new Vector(int.MinValue); // 0x80000000 + var sgn0 = Vector.AsVectorSingle(magic0); + var and0 = Vector.BitwiseAnd(sgn0, v); + var or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f)); + var add0 = Vector.Add(v, or0); + return Vector.Subtract(add0, or0); + } } /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs index 774780170..12ea39e37 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs @@ -93,25 +93,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder public unsafe int DecodeHuffman(ref HuffmanTable h) { this.CheckBits(); - int v = this.PeekBits(JpegConstants.Huffman.LookupBits); - int symbol = h.LookaheadValue[v]; - int size = h.LookaheadSize[v]; + int index = this.PeekBits(JpegConstants.Huffman.LookupBits); + int size = h.LookaheadSize[index]; - if (size == JpegConstants.Huffman.SlowBits) + if (size < JpegConstants.Huffman.SlowBits) { - ulong x = this.data << (JpegConstants.Huffman.RegisterSize - this.remainingBits); - while (x > h.MaxCode[size]) - { - size++; - } + this.remainingBits -= size; + return h.LookaheadValue[index]; + } - v = (int)(x >> (JpegConstants.Huffman.RegisterSize - size)); - symbol = h.Values[(h.ValOffset[size] + v) & 0xFF]; + ulong x = this.data << (JpegConstants.Huffman.RegisterSize - this.remainingBits); + while (x > h.MaxCode[size]) + { + size++; } this.remainingBits -= size; - return symbol; + return h.Values[(h.ValOffset[size] + (int)(x >> (JpegConstants.Huffman.RegisterSize - size))) & 0xFF]; } [MethodImpl(InliningOptions.ShortMethod)] diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs index 8c597a8c5..0e68af87a 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System.Drawing; using System.IO; using BenchmarkDotNet.Attributes; @@ -15,7 +14,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg [Config(typeof(Config.ShortClr))] public class DecodeJpegParseStreamOnly { - [Params(TestImages.Jpeg.Baseline.Jpeg420Exif)] + [Params(TestImages.Jpeg.BenchmarkSuite.Lake_Small444YCbCr)] public string TestImage { get; set; } private string TestImageFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, this.TestImage); @@ -37,7 +36,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg } [Benchmark(Description = "JpegDecoderCore.ParseStream")] - public void ParseStreamPdfJs() + public void ParseStream() { using var memoryStream = new MemoryStream(this.jpegBytes); using var bufferedStream = new BufferedReadStream(Configuration.Default, memoryStream); @@ -46,22 +45,18 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg decoder.ParseStream(bufferedStream); decoder.Dispose(); } - - // RESULTS (2019 April 23): - // - // BenchmarkDotNet=v0.11.3, OS=Windows 10.0.17763.437 (1809/October2018Update/Redstone5) - // Intel Core i7-6600U CPU 2.60GHz (Skylake), 1 CPU, 4 logical and 2 physical cores - // .NET Core SDK=2.2.202 - // [Host] : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT - // Clr : .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3362.0 - // Core : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT - // - // | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------------------- |----- |-------- |--------------------- |---------:|---------:|----------:|------:|--------:|---------:|------:|------:|----------:| - // | 'System.Drawing FULL' | Clr | Clr | Jpg/b(...)f.jpg [28] | 18.69 ms | 8.273 ms | 0.4535 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.89 KB | - // | JpegDecoderCore.ParseStream | Clr | Clr | Jpg/b(...)f.jpg [28] | 15.76 ms | 4.266 ms | 0.2339 ms | 0.84 | 0.03 | - | - | - | 11.83 KB | - // | | | | | | | | | | | | | | - // | 'System.Drawing FULL' | Core | Core | Jpg/b(...)f.jpg [28] | 17.68 ms | 2.711 ms | 0.1486 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.04 KB | - // | JpegDecoderCore.ParseStream | Core | Core | Jpg/b(...)f.jpg [28] | 14.27 ms | 3.671 ms | 0.2012 ms | 0.81 | 0.00 | - | - | - | 11.76 KB | } + + /* + | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | + |---------------------------- |----------- |-------------- |--------------------- |---------:|----------:|----------:|------:|--------:|------:|------:|----------:| + | 'System.Drawing FULL' | Job-HITJFX | .NET 4.7.2 | Jpg/b(...)e.jpg [21] | 5.828 ms | 0.9885 ms | 0.0542 ms | 1.00 | 46.8750 | - | - | 211566 B | + | JpegDecoderCore.ParseStream | Job-HITJFX | .NET 4.7.2 | Jpg/b(...)e.jpg [21] | 5.833 ms | 0.2923 ms | 0.0160 ms | 1.00 | - | - | - | 12416 B | + | | | | | | | | | | | | | + | 'System.Drawing FULL' | Job-WPSKZD | .NET Core 2.1 | Jpg/b(...)e.jpg [21] | 6.018 ms | 2.1374 ms | 0.1172 ms | 1.00 | 46.8750 | - | - | 210768 B | + | JpegDecoderCore.ParseStream | Job-WPSKZD | .NET Core 2.1 | Jpg/b(...)e.jpg [21] | 4.382 ms | 0.9009 ms | 0.0494 ms | 0.73 | - | - | - | 12360 B | + | | | | | | | | | | | | | + | 'System.Drawing FULL' | Job-ZLSNRP | .NET Core 3.1 | Jpg/b(...)e.jpg [21] | 5.714 ms | 0.4078 ms | 0.0224 ms | 1.00 | - | - | - | 176 B | + | JpegDecoderCore.ParseStream | Job-ZLSNRP | .NET Core 3.1 | Jpg/b(...)e.jpg [21] | 4.239 ms | 1.0943 ms | 0.0600 ms | 0.74 | - | - | - | 12406 B | + */ } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs index 620a4d5ed..4b1ee81a4 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs @@ -90,45 +90,17 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg } } - // RESULTS (2018 November 4): - // - // BenchmarkDotNet=v0.10.14, OS=Windows 10.0.17134 - // Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores - // Frequency=2742191 Hz, Resolution=364.6719 ns, Timer=TSC - // .NET Core SDK=2.1.403 - // [Host] : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT - // - // Method | TestImage | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // ------------------------------- |-------------------------------------------- |-----------:|-----------:|----------:|-------:|---------:|----------:|---------:|---------:|------------:| - // 'Decode Jpeg - System.Drawing' | Jpg/baseline/Lake.jpg | 6.117 ms | 0.3923 ms | 0.0222 ms | 1.00 | 0.00 | 62.5000 | - | - | 205.83 KB | - // 'Decode Jpeg - ImageSharp' | Jpg/baseline/Lake.jpg | 18.126 ms | 0.6023 ms | 0.0340 ms | 2.96 | 0.01 | - | - | - | 19.97 KB | - // | | | | | | | | | | | - // 'Decode Jpeg - System.Drawing' | Jpg/baseline/jpeg420exif.jpg | 17.063 ms | 2.6096 ms | 0.1474 ms | 1.00 | 0.00 | 218.7500 | - | - | 757.04 KB | - // 'Decode Jpeg - ImageSharp' | Jpg/baseline/jpeg420exif.jpg | 41.366 ms | 1.0115 ms | 0.0572 ms | 2.42 | 0.02 | - | - | - | 21.94 KB | - // | | | | | | | | | | | - // 'Decode Jpeg - System.Drawing' | Jpg/issues/Issue518-Bad-RST-Progressive.jpg | 428.282 ms | 94.9163 ms | 5.3629 ms | 1.00 | 0.00 | 2375.0000 | - | - | 7403.76 KB | - // 'Decode Jpeg - ImageSharp' | Jpg/issues/Issue518-Bad-RST-Progressive.jpg | 386.698 ms | 33.0065 ms | 1.8649 ms | 0.90 | 0.01 | 125.0000 | 125.0000 | 125.0000 | 35186.97 KB | - // | | | | | | | | | | | - // 'Decode Jpeg - System.Drawing' | Jpg/issues/issue750-exif-tranform.jpg | 95.192 ms | 3.1762 ms | 0.1795 ms | 1.00 | 0.00 | 1750.0000 | - | - | 5492.63 KB | - // 'Decode Jpeg - ImageSharp' | Jpg/issues/issue750-exif-tranform.jpg | 230.158 ms | 48.8128 ms | 2.7580 ms | 2.42 | 0.02 | 312.5000 | 312.5000 | 312.5000 | 58834.66 KB | - - // RESULTS (2019 April 23): - // - // BenchmarkDotNet=v0.11.5, OS=Windows 10.0.17763.437 (1809/October2018Update/Redstone5) - // Intel Core i7-6600U CPU 2.60GHz (Skylake), 1 CPU, 4 logical and 2 physical cores - // .NET Core SDK=2.2.202 - // [Host] : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT - // Core : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT - // - // | Method | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |------------------------------- |--------------------- |-----------:|-----------:|-----------:|------:|--------:|----------:|------:|------:|------------:| - // | 'Decode Jpeg - System.Drawing' | Jpg/b(...)e.jpg [21] | 6.957 ms | 9.618 ms | 0.5272 ms | 1.00 | 0.00 | 93.7500 | - | - | 205.83 KB | - // | 'Decode Jpeg - ImageSharp' | Jpg/b(...)e.jpg [21] | 18.348 ms | 8.876 ms | 0.4865 ms | 2.65 | 0.23 | - | - | - | 14.49 KB | - // | | | | | | | | | | | | - // | 'Decode Jpeg - System.Drawing' | Jpg/b(...)f.jpg [28] | 18.687 ms | 11.632 ms | 0.6376 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.04 KB | - // | 'Decode Jpeg - ImageSharp' | Jpg/b(...)f.jpg [28] | 41.990 ms | 25.514 ms | 1.3985 ms | 2.25 | 0.10 | - | - | - | 15.48 KB | - // | | | | | | | | | | | | - // | 'Decode Jpeg - System.Drawing' | Jpg/i(...)e.jpg [43] | 477.265 ms | 732.126 ms | 40.1303 ms | 1.00 | 0.00 | 3000.0000 | - | - | 7403.76 KB | - // | 'Decode Jpeg - ImageSharp' | Jpg/i(...)e.jpg [43] | 348.545 ms | 91.480 ms | 5.0143 ms | 0.73 | 0.06 | - | - | - | 35177.21 KB | + /* + | Method | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + |------------------------------- |--------------------- |-----------:|------------:|-----------:|------:|--------:|------:|------:|------:|-----------:| + | 'Decode Jpeg - System.Drawing' | Jpg/b(...)e.jpg [21] | 5.122 ms | 1.3978 ms | 0.0766 ms | 1.00 | 0.00 | - | - | - | 176 B | + | 'Decode Jpeg - ImageSharp' | Jpg/b(...)e.jpg [21] | 11.991 ms | 0.2514 ms | 0.0138 ms | 2.34 | 0.03 | - | - | - | 15816 B | + | | | | | | | | | | | | + | 'Decode Jpeg - System.Drawing' | Jpg/b(...)f.jpg [28] | 14.943 ms | 1.8410 ms | 0.1009 ms | 1.00 | 0.00 | - | - | - | 176 B | + | 'Decode Jpeg - ImageSharp' | Jpg/b(...)f.jpg [28] | 29.759 ms | 1.5452 ms | 0.0847 ms | 1.99 | 0.01 | - | - | - | 16824 B | + | | | | | | | | | | | | + | 'Decode Jpeg - System.Drawing' | Jpg/i(...)e.jpg [43] | 388.229 ms | 382.8946 ms | 20.9877 ms | 1.00 | 0.00 | - | - | - | 216 B | + | 'Decode Jpeg - ImageSharp' | Jpg/i(...)e.jpg [43] | 276.490 ms | 195.5104 ms | 10.7166 ms | 0.71 | 0.01 | - | - | - | 36022368 B | + */ } }