From ee901a0277e688a768eaad7304e1cf31afcd2c3f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 18 May 2020 16:51:09 +0100 Subject: [PATCH] Update Crc32 based on feedback --- src/ImageSharp/Formats/Png/Zlib/Adler32.cs | 1 + src/ImageSharp/Formats/Png/Zlib/Crc32.cs | 36 +++++++----- .../General/Adler32Benchmark.cs | 56 +++++++++---------- .../General/Crc32Benchmark.cs | 56 +++++++++---------- 4 files changed, 78 insertions(+), 71 deletions(-) diff --git a/src/ImageSharp/Formats/Png/Zlib/Adler32.cs b/src/ImageSharp/Formats/Png/Zlib/Adler32.cs index dc8b7ad0d..3d41c6b82 100644 --- a/src/ImageSharp/Formats/Png/Zlib/Adler32.cs +++ b/src/ImageSharp/Formats/Png/Zlib/Adler32.cs @@ -31,6 +31,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib #if SUPPORTS_RUNTIME_INTRINSICS private const int MinBufferSize = 64; + // The C# compiler emits this as a compile-time constant embedded in the PE file. private static ReadOnlySpan Tap1Tap2 => new byte[] { 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, // tap1 diff --git a/src/ImageSharp/Formats/Png/Zlib/Crc32.cs b/src/ImageSharp/Formats/Png/Zlib/Crc32.cs index ad047a41d..b25c042e1 100644 --- a/src/ImageSharp/Formats/Png/Zlib/Crc32.cs +++ b/src/ImageSharp/Formats/Png/Zlib/Crc32.cs @@ -12,8 +12,8 @@ using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.Formats.Png.Zlib { /// - /// Calculates the 32 bit Cyclic Redundancy Check (CRC) checksum of a given buffer according to the - /// IEEE 802.3 specification. + /// Calculates the 32 bit Cyclic Redundancy Check (CRC) checksum of a given buffer + /// according to the IEEE 802.3 specification. /// internal static partial class Crc32 { @@ -28,10 +28,13 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib // Definitions of the bit-reflected domain constants k1, k2, k3, etc and // the CRC32+Barrett polynomials given at the end of the paper. - private static ulong[] k1k2 = { 0x0154442bd4, 0x01c6e41596 }; - private static ulong[] k3k4 = { 0x01751997d0, 0x00ccaa009e }; - private static ulong[] k5k0 = { 0x0163cd6124, 0x0000000000 }; - private static ulong[] poly = { 0x01db710641, 0x01f7011641 }; + private static readonly ulong[] K05Poly = + { + 0x0154442bd4, 0x01c6e41596, // k1, k2 + 0x01751997d0, 0x00ccaa009e, // k3, k4 + 0x0163cd6124, 0x0000000000, // k5, k0 + 0x01db710641, 0x01f7011641 // polynomial + }; #endif /// @@ -79,13 +82,11 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib int chunksize = buffer.Length & ~ChunksizeMask; int length = chunksize; - fixed (byte* bufferPtr = &buffer[0]) - fixed (ulong* k1k2Ptr = &k1k2[0]) - fixed (ulong* k3k4Ptr = &k3k4[0]) - fixed (ulong* k5k0Ptr = &k5k0[0]) - fixed (ulong* polyPtr = &poly[0]) + fixed (byte* bufferPtr = buffer) + fixed (ulong* k05PolyPtr = K05Poly) { byte* localBufferPtr = bufferPtr; + ulong* localK05PolyPtr = k05PolyPtr; // There's at least one block of 64. Vector128 x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00)); @@ -95,7 +96,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib Vector128 x5; x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64()); - Vector128 x0 = Sse2.LoadVector128(k1k2Ptr); + + // k1, k2 + Vector128 x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0); localBufferPtr += 64; length -= 64; @@ -133,7 +136,8 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib } // Fold into 128-bits. - x0 = Sse2.LoadVector128(k3k4Ptr); + // k3, k4 + x0 = Sse2.LoadVector128(k05PolyPtr + 0x2); x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00); x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11); @@ -170,7 +174,8 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib x1 = Sse2.ShiftRightLogical128BitLane(x1, 8); x1 = Sse2.Xor(x1, x2); - x0 = Sse2.LoadScalarVector128(k5k0Ptr); + // k5, k0 + x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4); x2 = Sse2.ShiftRightLogical128BitLane(x1, 4); x1 = Sse2.And(x1, x3); @@ -178,7 +183,8 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib x1 = Sse2.Xor(x1, x2); // Barret reduce to 32-bits. - x0 = Sse2.LoadVector128(polyPtr); + // polynomial + x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6); x2 = Sse2.And(x1, x3); x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10); diff --git a/tests/ImageSharp.Benchmarks/General/Adler32Benchmark.cs b/tests/ImageSharp.Benchmarks/General/Adler32Benchmark.cs index 5a4a51325..37144bd94 100644 --- a/tests/ImageSharp.Benchmarks/General/Adler32Benchmark.cs +++ b/tests/ImageSharp.Benchmarks/General/Adler32Benchmark.cs @@ -41,32 +41,32 @@ namespace SixLabors.ImageSharp.Benchmarks.General // ########## 17/05/2020 ########## // - // | Method | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |--------------------- |-------------- |------ |------------:|-------------:|-----------:|------:|--------:|------:|------:|------:|----------:| - // | SharpZipLibCalculate | .NET 4.7.2 | 1024 | 847.94 ns | 180.284 ns | 9.882 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET 4.7.2 | 1024 | 458.80 ns | 146.235 ns | 8.016 ns | 0.54 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 2.1 | 1024 | 817.11 ns | 31.211 ns | 1.711 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 2.1 | 1024 | 421.48 ns | 86.149 ns | 4.722 ns | 0.52 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 3.1 | 1024 | 879.38 ns | 37.804 ns | 2.072 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 3.1 | 1024 | 57.27 ns | 2.008 ns | 0.110 ns | 0.07 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET 4.7.2 | 2048 | 1,660.62 ns | 46.912 ns | 2.571 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET 4.7.2 | 2048 | 938.41 ns | 3,137.008 ns | 171.950 ns | 0.57 | 0.10 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 2.1 | 2048 | 1,616.69 ns | 172.974 ns | 9.481 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 2.1 | 2048 | 871.52 ns | 485.678 ns | 26.622 ns | 0.54 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 3.1 | 2048 | 1,746.34 ns | 110.539 ns | 6.059 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 3.1 | 2048 | 96.31 ns | 24.491 ns | 1.342 ns | 0.06 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET 4.7.2 | 4096 | 3,102.18 ns | 484.204 ns | 26.541 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET 4.7.2 | 4096 | 1,729.49 ns | 104.446 ns | 5.725 ns | 0.56 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 2.1 | 4096 | 3,251.55 ns | 607.086 ns | 33.276 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 2.1 | 4096 | 1,669.22 ns | 25.194 ns | 1.381 ns | 0.51 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 3.1 | 4096 | 3,514.15 ns | 719.548 ns | 39.441 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 3.1 | 4096 | 180.12 ns | 55.425 ns | 3.038 ns | 0.05 | 0.00 | - | - | - | - | + // | Method | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------- |-------------- |------ |------------:|------------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | SharpZipLibCalculate | .NET 4.7.2 | 1024 | 793.18 ns | 775.66 ns | 42.516 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET 4.7.2 | 1024 | 384.86 ns | 15.64 ns | 0.857 ns | 0.49 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 2.1 | 1024 | 790.31 ns | 353.34 ns | 19.368 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 2.1 | 1024 | 465.28 ns | 652.41 ns | 35.761 ns | 0.59 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 3.1 | 1024 | 877.25 ns | 97.89 ns | 5.365 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 3.1 | 1024 | 45.60 ns | 13.28 ns | 0.728 ns | 0.05 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET 4.7.2 | 2048 | 1,537.04 ns | 428.44 ns | 23.484 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET 4.7.2 | 2048 | 849.76 ns | 1,066.34 ns | 58.450 ns | 0.55 | 0.04 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 2.1 | 2048 | 1,616.97 ns | 276.70 ns | 15.167 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 2.1 | 2048 | 790.77 ns | 691.71 ns | 37.915 ns | 0.49 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 3.1 | 2048 | 1,735.11 ns | 1,374.22 ns | 75.325 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 3.1 | 2048 | 87.80 ns | 56.84 ns | 3.116 ns | 0.05 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET 4.7.2 | 4096 | 3,054.53 ns | 796.41 ns | 43.654 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET 4.7.2 | 4096 | 1,538.90 ns | 487.02 ns | 26.695 ns | 0.50 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 2.1 | 4096 | 3,223.48 ns | 32.32 ns | 1.771 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 2.1 | 4096 | 1,547.60 ns | 309.72 ns | 16.977 ns | 0.48 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 3.1 | 4096 | 3,672.33 ns | 1,095.81 ns | 60.065 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 3.1 | 4096 | 159.44 ns | 36.31 ns | 1.990 ns | 0.04 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/General/Crc32Benchmark.cs b/tests/ImageSharp.Benchmarks/General/Crc32Benchmark.cs index 4bd273b30..7f85d5aad 100644 --- a/tests/ImageSharp.Benchmarks/General/Crc32Benchmark.cs +++ b/tests/ImageSharp.Benchmarks/General/Crc32Benchmark.cs @@ -41,32 +41,32 @@ namespace SixLabors.ImageSharp.Benchmarks.General // ########## 17/05/2020 ########## // - // | Method | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |--------------------- |-------------- |------ |-------------:|-------------:|-------------:|------:|--------:|------:|------:|------:|----------:| - // | SharpZipLibCalculate | .NET 4.7.2 | 1024 | 3,067.24 ns | 769.25 ns | 42.165 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET 4.7.2 | 1024 | 2,546.86 ns | 1,106.36 ns | 60.643 ns | 0.83 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 2.1 | 1024 | 3,377.15 ns | 3,903.41 ns | 213.959 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 2.1 | 1024 | 2,524.25 ns | 2,220.97 ns | 121.739 ns | 0.75 | 0.04 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 3.1 | 1024 | 3,980.60 ns | 8,497.37 ns | 465.769 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 3.1 | 1024 | 78.68 ns | 69.82 ns | 3.827 ns | 0.02 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET 4.7.2 | 2048 | 7,934.29 ns | 42,550.13 ns | 2,332.316 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET 4.7.2 | 2048 | 5,437.81 ns | 12,760.51 ns | 699.447 ns | 0.71 | 0.10 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 2.1 | 2048 | 6,008.05 ns | 621.37 ns | 34.059 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 2.1 | 2048 | 4,791.50 ns | 3,894.94 ns | 213.495 ns | 0.80 | 0.04 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 3.1 | 2048 | 5,900.06 ns | 1,344.70 ns | 73.707 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 3.1 | 2048 | 103.12 ns | 15.66 ns | 0.859 ns | 0.02 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET 4.7.2 | 4096 | 12,422.59 ns | 1,308.01 ns | 71.696 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET 4.7.2 | 4096 | 10,524.63 ns | 6,267.56 ns | 343.546 ns | 0.85 | 0.03 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 2.1 | 4096 | 11,888.00 ns | 1,059.25 ns | 58.061 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 2.1 | 4096 | 9,806.24 ns | 241.91 ns | 13.260 ns | 0.82 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | - // | SharpZipLibCalculate | .NET Core 3.1 | 4096 | 12,181.28 ns | 1,974.68 ns | 108.239 ns | 1.00 | 0.00 | - | - | - | - | - // | SixLaborsCalculate | .NET Core 3.1 | 4096 | 192.39 ns | 10.27 ns | 0.563 ns | 0.02 | 0.00 | - | - | - | - | + // | Method | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------- |-------------- |------ |-------------:|-------------:|-----------:|------:|--------:|------:|------:|------:|----------:| + // | SharpZipLibCalculate | .NET 4.7.2 | 1024 | 2,797.77 ns | 278.697 ns | 15.276 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET 4.7.2 | 1024 | 2,275.56 ns | 216.100 ns | 11.845 ns | 0.81 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 2.1 | 1024 | 2,923.43 ns | 2,656.882 ns | 145.633 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 2.1 | 1024 | 2,257.79 ns | 75.081 ns | 4.115 ns | 0.77 | 0.04 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 3.1 | 1024 | 2,764.14 ns | 86.281 ns | 4.729 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 3.1 | 1024 | 49.32 ns | 1.813 ns | 0.099 ns | 0.02 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET 4.7.2 | 2048 | 5,603.71 ns | 427.240 ns | 23.418 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET 4.7.2 | 2048 | 4,525.02 ns | 33.931 ns | 1.860 ns | 0.81 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 2.1 | 2048 | 5,563.32 ns | 49.337 ns | 2.704 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 2.1 | 2048 | 4,519.61 ns | 29.837 ns | 1.635 ns | 0.81 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 3.1 | 2048 | 5,543.37 ns | 518.551 ns | 28.424 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 3.1 | 2048 | 89.07 ns | 3.312 ns | 0.182 ns | 0.02 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET 4.7.2 | 4096 | 11,396.95 ns | 373.450 ns | 20.470 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET 4.7.2 | 4096 | 9,070.35 ns | 271.083 ns | 14.859 ns | 0.80 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 2.1 | 4096 | 11,127.81 ns | 239.177 ns | 13.110 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 2.1 | 4096 | 9,050.46 ns | 230.916 ns | 12.657 ns | 0.81 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | + // | SharpZipLibCalculate | .NET Core 3.1 | 4096 | 11,098.62 ns | 687.978 ns | 37.710 ns | 1.00 | 0.00 | - | - | - | - | + // | SixLaborsCalculate | .NET Core 3.1 | 4096 | 168.11 ns | 3.633 ns | 0.199 ns | 0.02 | 0.00 | - | - | - | - | }