Browse Source

Update Adler32.cs

pull/1574/head
James Jackson-South 6 years ago
parent
commit
4383c9283f
  1. 217
      src/ImageSharp/Formats/Png/Zlib/Adler32.cs

217
src/ImageSharp/Formats/Png/Zlib/Adler32.cs

@ -3,12 +3,12 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
#pragma warning disable IDE0007 // Use implicit type
namespace SixLabors.ImageSharp.Formats.Png.Zlib
{
/// <summary>
@ -22,16 +22,22 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
/// </summary>
public const uint SeedValue = 1U;
#if SUPPORTS_RUNTIME_INTRINSICS
private const int MinBufferSize = 64;
#endif
// Largest prime smaller than 65536
private const uint BASE = 65521;
// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
private const uint NMAX = 5552;
#if SUPPORTS_RUNTIME_INTRINSICS
private const int MinBufferSize = 64;
private static ReadOnlySpan<byte> Tap1Tap2 => new byte[]
{
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, // tap1
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 // tap2
};
#endif
/// <summary>
/// Calculates the Adler32 checksum with the bytes taken from the span.
/// </summary>
@ -83,14 +89,15 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
length -= blocks * BLOCK_SIZE;
int index = 0;
fixed (byte* bufferPtr = &buffer[0])
fixed (byte* bufferPtr = buffer)
fixed (byte* tapPtr = Tap1Tap2)
{
index += (int)blocks * BLOCK_SIZE;
var localBufferPtr = bufferPtr;
// _mm_setr_epi8 on x86
var tap1 = Vector128.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
var tap2 = Vector128.Create(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
Vector128<sbyte> tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
Vector128<sbyte> tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
Vector128<byte> zero = Vector128<byte>.Zero;
var ones = Vector128.Create((short)1);
@ -106,28 +113,28 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
// Process n blocks of data. At most NMAX data bytes can be
// processed before s2 must be reduced modulo BASE.
Vector128<int> v_ps = Vector128.CreateScalar(s1 * n).AsInt32();
Vector128<int> v_s2 = Vector128.CreateScalar(s2).AsInt32();
Vector128<int> v_s1 = Vector128<int>.Zero;
Vector128<uint> v_ps = Vector128.CreateScalar(s1 * n);
Vector128<uint> v_s2 = Vector128.CreateScalar(s2);
Vector128<uint> v_s1 = Vector128<uint>.Zero;
do
{
// Load 32 input bytes.
Vector128<byte> bytes1 = Sse3.LoadDquVector128(localBufferPtr);
Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 16);
Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
// Add previous block byte sum to v_ps.
v_ps = Sse2.Add(v_ps, v_s1);
// Horizontally add the bytes for s1, multiply-adds the
// bytes by [ 32, 31, 30, ... ] for s2.
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsInt32());
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
Vector128<short> mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones));
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsInt32());
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
Vector128<short> mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones));
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
localBufferPtr += BLOCK_SIZE;
}
@ -139,148 +146,114 @@ namespace SixLabors.ImageSharp.Formats.Png.Zlib
const byte S2301 = 0b1011_0001; // A B C D -> B A D C
const byte S1032 = 0b0100_1110; // A B C D -> C D A B
v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S2301));
v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
s1 += (uint)v_s1.ToScalar();
s1 += v_s1.ToScalar();
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
s2 = (uint)v_s2.ToScalar();
s2 = v_s2.ToScalar();
// Reduce.
s1 %= BASE;
s2 %= BASE;
}
}
ref byte bufferRef = ref MemoryMarshal.GetReference(buffer);
if (length > 0)
{
if (length >= 16)
if (length > 0)
{
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
length -= 16;
}
if (length >= 16)
{
s2 += s1 += localBufferPtr[0];
s2 += s1 += localBufferPtr[1];
s2 += s1 += localBufferPtr[2];
s2 += s1 += localBufferPtr[3];
s2 += s1 += localBufferPtr[4];
s2 += s1 += localBufferPtr[5];
s2 += s1 += localBufferPtr[6];
s2 += s1 += localBufferPtr[7];
s2 += s1 += localBufferPtr[8];
s2 += s1 += localBufferPtr[9];
s2 += s1 += localBufferPtr[10];
s2 += s1 += localBufferPtr[11];
s2 += s1 += localBufferPtr[12];
s2 += s1 += localBufferPtr[13];
s2 += s1 += localBufferPtr[14];
s2 += s1 += localBufferPtr[15];
localBufferPtr += 16;
length -= 16;
}
while (length-- > 0)
{
s2 += s1 += Unsafe.Add(ref bufferRef, index++);
}
while (length-- > 0)
{
s2 += s1 += *localBufferPtr++;
}
if (s1 >= BASE)
{
s1 -= BASE;
if (s1 >= BASE)
{
s1 -= BASE;
}
s2 %= BASE;
}
s2 %= BASE;
return s1 | (s2 << 16);
}
return s1 | (s2 << 16);
}
#endif
[MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)]
private static uint CalculateScalar(uint adler, ReadOnlySpan<byte> buffer)
private static unsafe uint CalculateScalar(uint adler, ReadOnlySpan<byte> buffer)
{
uint s1 = adler & 0xFFFF;
uint s2 = (adler >> 16) & 0xFFFF;
uint k;
ref byte bufferRef = ref MemoryMarshal.GetReference<byte>(buffer);
uint length = (uint)buffer.Length;
int index = 0;
while (length > 0)
fixed (byte* bufferPtr = buffer)
{
k = length < NMAX ? length : NMAX;
length -= k;
var localBufferPtr = bufferPtr;
uint length = (uint)buffer.Length;
while (k >= 16)
while (length > 0)
{
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
k -= 16;
}
k = length < NMAX ? length : NMAX;
length -= k;
if (k != 0)
{
do
while (k >= 16)
{
s2 += s1 += localBufferPtr[0];
s2 += s1 += localBufferPtr[1];
s2 += s1 += localBufferPtr[2];
s2 += s1 += localBufferPtr[3];
s2 += s1 += localBufferPtr[4];
s2 += s1 += localBufferPtr[5];
s2 += s1 += localBufferPtr[6];
s2 += s1 += localBufferPtr[7];
s2 += s1 += localBufferPtr[8];
s2 += s1 += localBufferPtr[9];
s2 += s1 += localBufferPtr[10];
s2 += s1 += localBufferPtr[11];
s2 += s1 += localBufferPtr[12];
s2 += s1 += localBufferPtr[13];
s2 += s1 += localBufferPtr[14];
s2 += s1 += localBufferPtr[15];
localBufferPtr += 16;
k -= 16;
}
while (k-- > 0)
{
s1 += Unsafe.Add(ref bufferRef, index++);
s2 += s1;
s2 += s1 += *localBufferPtr++;
}
while (--k != 0);
s1 %= BASE;
s2 %= BASE;
}
s1 %= BASE;
s2 %= BASE;
return (s2 << 16) | s1;
}
return (s2 << 16) | s1;
}
}
}

Loading…
Cancel
Save