diff --git a/src/ImageSharp/Formats/Png/Zlib/Adler32.cs b/src/ImageSharp/Formats/Png/Zlib/Adler32.cs
index bd354a508..dc8b7ad0d 100644
--- a/src/ImageSharp/Formats/Png/Zlib/Adler32.cs
+++ b/src/ImageSharp/Formats/Png/Zlib/Adler32.cs
@@ -3,12 +3,12 @@
using System;
using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
+#pragma warning disable IDE0007 // Use implicit type
namespace SixLabors.ImageSharp.Formats.Png.Zlib
{
///
@@ -22,16 +22,22 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
///
public const uint SeedValue = 1U;
-#if SUPPORTS_RUNTIME_INTRINSICS
- private const int MinBufferSize = 64;
-#endif
-
// Largest prime smaller than 65536
private const uint BASE = 65521;
// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
private const uint NMAX = 5552;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private const int MinBufferSize = 64;
+
+ private static ReadOnlySpan Tap1Tap2 => new byte[]
+ {
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, // tap1
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 // tap2
+ };
+#endif
+
///
/// Calculates the Adler32 checksum with the bytes taken from the span.
///
@@ -83,14 +89,15 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
length -= blocks * BLOCK_SIZE;
int index = 0;
- fixed (byte* bufferPtr = &buffer[0])
+ fixed (byte* bufferPtr = buffer)
+ fixed (byte* tapPtr = Tap1Tap2)
{
index += (int)blocks * BLOCK_SIZE;
var localBufferPtr = bufferPtr;
// _mm_setr_epi8 on x86
- var tap1 = Vector128.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
- var tap2 = Vector128.Create(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+ Vector128 tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
+ Vector128 tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
Vector128 zero = Vector128.Zero;
var ones = Vector128.Create((short)1);
@@ -106,28 +113,28 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
// Process n blocks of data. At most NMAX data bytes can be
// processed before s2 must be reduced modulo BASE.
- Vector128 v_ps = Vector128.CreateScalar(s1 * n).AsInt32();
- Vector128 v_s2 = Vector128.CreateScalar(s2).AsInt32();
- Vector128 v_s1 = Vector128.Zero;
+ Vector128 v_ps = Vector128.CreateScalar(s1 * n);
+ Vector128 v_s2 = Vector128.CreateScalar(s2);
+ Vector128 v_s1 = Vector128.Zero;
do
{
// Load 32 input bytes.
Vector128 bytes1 = Sse3.LoadDquVector128(localBufferPtr);
- Vector128 bytes2 = Sse3.LoadDquVector128(localBufferPtr + 16);
+ Vector128 bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
// Add previous block byte sum to v_ps.
v_ps = Sse2.Add(v_ps, v_s1);
// Horizontally add the bytes for s1, multiply-adds the
// bytes by [ 32, 31, 30, ... ] for s2.
- v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsInt32());
+ v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
Vector128 mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
- v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones));
+ v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
- v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsInt32());
+ v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
Vector128 mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
- v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones));
+ v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
localBufferPtr += BLOCK_SIZE;
}
@@ -139,148 +146,114 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
const byte S2301 = 0b1011_0001; // A B C D -> B A D C
const byte S1032 = 0b0100_1110; // A B C D -> C D A B
- v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S2301));
v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
- s1 += (uint)v_s1.ToScalar();
+ s1 += v_s1.ToScalar();
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
- s2 = (uint)v_s2.ToScalar();
+ s2 = v_s2.ToScalar();
// Reduce.
s1 %= BASE;
s2 %= BASE;
}
- }
-
- ref byte bufferRef = ref MemoryMarshal.GetReference(buffer);
- if (length > 0)
- {
- if (length >= 16)
+ if (length > 0)
{
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- length -= 16;
- }
+ if (length >= 16)
+ {
+ s2 += s1 += localBufferPtr[0];
+ s2 += s1 += localBufferPtr[1];
+ s2 += s1 += localBufferPtr[2];
+ s2 += s1 += localBufferPtr[3];
+ s2 += s1 += localBufferPtr[4];
+ s2 += s1 += localBufferPtr[5];
+ s2 += s1 += localBufferPtr[6];
+ s2 += s1 += localBufferPtr[7];
+ s2 += s1 += localBufferPtr[8];
+ s2 += s1 += localBufferPtr[9];
+ s2 += s1 += localBufferPtr[10];
+ s2 += s1 += localBufferPtr[11];
+ s2 += s1 += localBufferPtr[12];
+ s2 += s1 += localBufferPtr[13];
+ s2 += s1 += localBufferPtr[14];
+ s2 += s1 += localBufferPtr[15];
+
+ localBufferPtr += 16;
+ length -= 16;
+ }
- while (length-- > 0)
- {
- s2 += s1 += Unsafe.Add(ref bufferRef, index++);
- }
+ while (length-- > 0)
+ {
+ s2 += s1 += *localBufferPtr++;
+ }
- if (s1 >= BASE)
- {
- s1 -= BASE;
+ if (s1 >= BASE)
+ {
+ s1 -= BASE;
+ }
+
+ s2 %= BASE;
}
- s2 %= BASE;
+ return s1 | (s2 << 16);
}
-
- return s1 | (s2 << 16);
}
#endif
[MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)]
- private static uint CalculateScalar(uint adler, ReadOnlySpan buffer)
+ private static unsafe uint CalculateScalar(uint adler, ReadOnlySpan buffer)
{
uint s1 = adler & 0xFFFF;
uint s2 = (adler >> 16) & 0xFFFF;
uint k;
- ref byte bufferRef = ref MemoryMarshal.GetReference(buffer);
- uint length = (uint)buffer.Length;
- int index = 0;
-
- while (length > 0)
+ fixed (byte* bufferPtr = buffer)
{
- k = length < NMAX ? length : NMAX;
- length -= k;
+ var localBufferPtr = bufferPtr;
+ uint length = (uint)buffer.Length;
- while (k >= 16)
+ while (length > 0)
{
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
- k -= 16;
- }
+ k = length < NMAX ? length : NMAX;
+ length -= k;
- if (k != 0)
- {
- do
+ while (k >= 16)
+ {
+ s2 += s1 += localBufferPtr[0];
+ s2 += s1 += localBufferPtr[1];
+ s2 += s1 += localBufferPtr[2];
+ s2 += s1 += localBufferPtr[3];
+ s2 += s1 += localBufferPtr[4];
+ s2 += s1 += localBufferPtr[5];
+ s2 += s1 += localBufferPtr[6];
+ s2 += s1 += localBufferPtr[7];
+ s2 += s1 += localBufferPtr[8];
+ s2 += s1 += localBufferPtr[9];
+ s2 += s1 += localBufferPtr[10];
+ s2 += s1 += localBufferPtr[11];
+ s2 += s1 += localBufferPtr[12];
+ s2 += s1 += localBufferPtr[13];
+ s2 += s1 += localBufferPtr[14];
+ s2 += s1 += localBufferPtr[15];
+
+ localBufferPtr += 16;
+ k -= 16;
+ }
+
+ while (k-- > 0)
{
- s1 += Unsafe.Add(ref bufferRef, index++);
- s2 += s1;
+ s2 += s1 += *localBufferPtr++;
}
- while (--k != 0);
+
+ s1 %= BASE;
+ s2 %= BASE;
}
- s1 %= BASE;
- s2 %= BASE;
+ return (s2 << 16) | s1;
}
-
- return (s2 << 16) | s1;
}
}
}