diff --git a/src/ImageSharp/Formats/Png/Zlib/Adler32.cs b/src/ImageSharp/Formats/Png/Zlib/Adler32.cs index bd354a508..dc8b7ad0d 100644 --- a/src/ImageSharp/Formats/Png/Zlib/Adler32.cs +++ b/src/ImageSharp/Formats/Png/Zlib/Adler32.cs @@ -3,12 +3,12 @@ using System; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; #endif +#pragma warning disable IDE0007 // Use implicit type namespace SixLabors.ImageSharp.Formats.Png.Zlib { /// @@ -22,16 +22,22 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib /// public const uint SeedValue = 1U; -#if SUPPORTS_RUNTIME_INTRINSICS - private const int MinBufferSize = 64; -#endif - // Largest prime smaller than 65536 private const uint BASE = 65521; // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 private const uint NMAX = 5552; +#if SUPPORTS_RUNTIME_INTRINSICS + private const int MinBufferSize = 64; + + private static ReadOnlySpan Tap1Tap2 => new byte[] + { + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, // tap1 + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 // tap2 + }; +#endif + /// /// Calculates the Adler32 checksum with the bytes taken from the span. /// @@ -83,14 +89,15 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib length -= blocks * BLOCK_SIZE; int index = 0; - fixed (byte* bufferPtr = &buffer[0]) + fixed (byte* bufferPtr = buffer) + fixed (byte* tapPtr = Tap1Tap2) { index += (int)blocks * BLOCK_SIZE; var localBufferPtr = bufferPtr; // _mm_setr_epi8 on x86 - var tap1 = Vector128.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17); - var tap2 = Vector128.Create(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + Vector128 tap1 = Sse2.LoadVector128((sbyte*)tapPtr); + Vector128 tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10)); Vector128 zero = Vector128.Zero; var ones = Vector128.Create((short)1); @@ -106,28 +113,28 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib // Process n blocks of data. At most NMAX data bytes can be // processed before s2 must be reduced modulo BASE. - Vector128 v_ps = Vector128.CreateScalar(s1 * n).AsInt32(); - Vector128 v_s2 = Vector128.CreateScalar(s2).AsInt32(); - Vector128 v_s1 = Vector128.Zero; + Vector128 v_ps = Vector128.CreateScalar(s1 * n); + Vector128 v_s2 = Vector128.CreateScalar(s2); + Vector128 v_s1 = Vector128.Zero; do { // Load 32 input bytes. Vector128 bytes1 = Sse3.LoadDquVector128(localBufferPtr); - Vector128 bytes2 = Sse3.LoadDquVector128(localBufferPtr + 16); + Vector128 bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10); // Add previous block byte sum to v_ps. v_ps = Sse2.Add(v_ps, v_s1); // Horizontally add the bytes for s1, multiply-adds the // bytes by [ 32, 31, 30, ... ] for s2. - v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsInt32()); + v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32()); Vector128 mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1); - v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones)); + v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32()); - v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsInt32()); + v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32()); Vector128 mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2); - v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones)); + v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32()); localBufferPtr += BLOCK_SIZE; } @@ -139,148 +146,114 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib const byte S2301 = 0b1011_0001; // A B C D -> B A D C const byte S1032 = 0b0100_1110; // A B C D -> C D A B - v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S2301)); v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032)); - s1 += (uint)v_s1.ToScalar(); + s1 += v_s1.ToScalar(); v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301)); v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032)); - s2 = (uint)v_s2.ToScalar(); + s2 = v_s2.ToScalar(); // Reduce. s1 %= BASE; s2 %= BASE; } - } - - ref byte bufferRef = ref MemoryMarshal.GetReference(buffer); - if (length > 0) - { - if (length >= 16) + if (length > 0) { - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - length -= 16; - } + if (length >= 16) + { + s2 += s1 += localBufferPtr[0]; + s2 += s1 += localBufferPtr[1]; + s2 += s1 += localBufferPtr[2]; + s2 += s1 += localBufferPtr[3]; + s2 += s1 += localBufferPtr[4]; + s2 += s1 += localBufferPtr[5]; + s2 += s1 += localBufferPtr[6]; + s2 += s1 += localBufferPtr[7]; + s2 += s1 += localBufferPtr[8]; + s2 += s1 += localBufferPtr[9]; + s2 += s1 += localBufferPtr[10]; + s2 += s1 += localBufferPtr[11]; + s2 += s1 += localBufferPtr[12]; + s2 += s1 += localBufferPtr[13]; + s2 += s1 += localBufferPtr[14]; + s2 += s1 += localBufferPtr[15]; + + localBufferPtr += 16; + length -= 16; + } - while (length-- > 0) - { - s2 += s1 += Unsafe.Add(ref bufferRef, index++); - } + while (length-- > 0) + { + s2 += s1 += *localBufferPtr++; + } - if (s1 >= BASE) - { - s1 -= BASE; + if (s1 >= BASE) + { + s1 -= BASE; + } + + s2 %= BASE; } - s2 %= BASE; + return s1 | (s2 << 16); } - - return s1 | (s2 << 16); } #endif [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] - private static uint CalculateScalar(uint adler, ReadOnlySpan buffer) + private static unsafe uint CalculateScalar(uint adler, ReadOnlySpan buffer) { uint s1 = adler & 0xFFFF; uint s2 = (adler >> 16) & 0xFFFF; uint k; - ref byte bufferRef = ref MemoryMarshal.GetReference(buffer); - uint length = (uint)buffer.Length; - int index = 0; - - while (length > 0) + fixed (byte* bufferPtr = buffer) { - k = length < NMAX ? length : NMAX; - length -= k; + var localBufferPtr = bufferPtr; + uint length = (uint)buffer.Length; - while (k >= 16) + while (length > 0) { - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; - k -= 16; - } + k = length < NMAX ? length : NMAX; + length -= k; - if (k != 0) - { - do + while (k >= 16) + { + s2 += s1 += localBufferPtr[0]; + s2 += s1 += localBufferPtr[1]; + s2 += s1 += localBufferPtr[2]; + s2 += s1 += localBufferPtr[3]; + s2 += s1 += localBufferPtr[4]; + s2 += s1 += localBufferPtr[5]; + s2 += s1 += localBufferPtr[6]; + s2 += s1 += localBufferPtr[7]; + s2 += s1 += localBufferPtr[8]; + s2 += s1 += localBufferPtr[9]; + s2 += s1 += localBufferPtr[10]; + s2 += s1 += localBufferPtr[11]; + s2 += s1 += localBufferPtr[12]; + s2 += s1 += localBufferPtr[13]; + s2 += s1 += localBufferPtr[14]; + s2 += s1 += localBufferPtr[15]; + + localBufferPtr += 16; + k -= 16; + } + + while (k-- > 0) { - s1 += Unsafe.Add(ref bufferRef, index++); - s2 += s1; + s2 += s1 += *localBufferPtr++; } - while (--k != 0); + + s1 %= BASE; + s2 %= BASE; } - s1 %= BASE; - s2 %= BASE; + return (s2 << 16) | s1; } - - return (s2 << 16) | s1; } } }