From b8a76143cc5f0ea070f0b1072c85dd13d4555aa3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 5 Feb 2023 22:58:13 +1000 Subject: [PATCH] Recreate @brianpopow 's changes --- src/ImageSharp/Compression/Zlib/Adler32.cs | 108 ++++++++++++++++-- src/ImageSharp/Compression/Zlib/Crc32.cs | 95 +++++++++++++++ .../Formats/Png/Filters/AverageFilter.cs | 34 +++++- .../Formats/Png/Filters/PaethFilter.cs | 79 +++++++++++++ .../Formats/Png/Filters/SubFilter.cs | 29 +++++ .../Formats/Png/Filters/UpFilter.cs | 46 +++++++- .../Formats/Png/Crc32Tests.cs | 36 ++++-- .../FeatureTesting/FeatureTestRunner.cs | 31 +++-- .../Tests/FeatureTestRunnerTests.cs | 56 ++++++++- 9 files changed, 476 insertions(+), 38 deletions(-) diff --git a/src/ImageSharp/Compression/Zlib/Adler32.cs b/src/ImageSharp/Compression/Zlib/Adler32.cs index 3885ef575..1b1a77715 100644 --- a/src/ImageSharp/Compression/Zlib/Adler32.cs +++ b/src/ImageSharp/Compression/Zlib/Adler32.cs @@ -4,6 +4,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; #pragma warning disable IDE0007 // Use implicit type @@ -95,7 +96,7 @@ internal static class Adler32 Vector128 tap1 = Sse2.LoadVector128((sbyte*)tapPtr); Vector128 tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10)); Vector128 zero = Vector128.Zero; - var ones = Vector128.Create((short)1); + Vector128 ones = Vector128.Create((short)1); while (blocks > 0) { @@ -179,13 +180,13 @@ internal static class Adler32 byte* localBufferPtr = bufferPtr; Vector256 zero = Vector256.Zero; - var dot3v = Vector256.Create((short)1); - var dot2v = Vector256.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + Vector256 dot3v = Vector256.Create((short)1); + Vector256 dot2v = Vector256.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); // Process n blocks of data. At most NMAX data bytes can be // processed before s2 must be reduced modulo BASE. - var vs1 = Vector256.CreateScalar(s1); - var vs2 = Vector256.CreateScalar(s2); + Vector256 vs1 = Vector256.CreateScalar(s1); + Vector256 vs2 = Vector256.CreateScalar(s2); while (length >= 32) { @@ -243,6 +244,100 @@ internal static class Adler32 } } + // Based on: https://github.com/chromium/chromium/blob/master/third_party/zlib/adler32_simd.c + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + private static unsafe uint CalculateArm(uint adler, ReadOnlySpan buffer) + { + // Split Adler-32 into component sums. + uint s1 = adler & 0xFFFF; + uint s2 = (adler >> 16) & 0xFFFF; + uint length = (uint)buffer.Length; + + // Process the data in blocks. + long blocks = length / BlockSize; + length -= (uint)(blocks * BlockSize); + fixed (byte* bufferPtr = &MemoryMarshal.GetReference(buffer)) + { + byte* localBufferPtr = bufferPtr; + + while (blocks != 0) + { + uint n = NMAX / BlockSize; + if (n > blocks) + { + n = (uint)blocks; + } + + blocks -= n; + + // Process n blocks of data. At most nMax data bytes can be + // processed before s2 must be reduced modulo Base. + Vector128 vs1 = Vector128.Zero; + Vector128 vs2 = vs1.WithElement(3, s1 * n); + Vector128 vColumnSum1 = Vector128.Zero; + Vector128 vColumnSum2 = Vector128.Zero; + Vector128 vColumnSum3 = Vector128.Zero; + Vector128 vColumnSum4 = Vector128.Zero; + + do + { + // Load 32 input bytes. + Vector128 bytes1 = AdvSimd.LoadVector128(localBufferPtr).AsUInt16(); + Vector128 bytes2 = AdvSimd.LoadVector128(localBufferPtr + 0x10).AsUInt16(); + + // Add previous block byte sum to v_s2. + vs2 = AdvSimd.Add(vs2, vs1); + + // Horizontally add the bytes for s1. + vs1 = AdvSimd.AddPairwiseWideningAndAdd( + vs1.AsUInt32(), + AdvSimd.AddPairwiseWideningAndAdd(AdvSimd.AddPairwiseWidening(bytes1.AsByte()).AsUInt16(), bytes2.AsByte())); + + // Vertically add the bytes for s2. + vColumnSum1 = AdvSimd.AddWideningLower(vColumnSum1, bytes1.GetLower().AsByte()); + vColumnSum2 = AdvSimd.AddWideningLower(vColumnSum2, bytes1.GetUpper().AsByte()); + vColumnSum3 = AdvSimd.AddWideningLower(vColumnSum3, bytes2.GetLower().AsByte()); + vColumnSum4 = AdvSimd.AddWideningLower(vColumnSum4, bytes2.GetUpper().AsByte()); + + localBufferPtr += BlockSize; + } + while (--n > 0); + + vs2 = AdvSimd.ShiftLeftLogical(vs2, 5); + + // Multiply-add bytes by [ 32, 31, 30, ... ] for s2. + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum1.GetLower(), Vector64.Create((ushort)32, 31, 30, 29)); + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum1.GetUpper(), Vector64.Create((ushort)28, 27, 26, 25)); + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum2.GetLower(), Vector64.Create((ushort)24, 23, 22, 21)); + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum2.GetUpper(), Vector64.Create((ushort)20, 19, 18, 17)); + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum3.GetLower(), Vector64.Create((ushort)16, 15, 14, 13)); + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum3.GetUpper(), Vector64.Create((ushort)12, 11, 10, 9)); + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum4.GetLower(), Vector64.Create((ushort)8, 7, 6, 5)); + vs2 = AdvSimd.MultiplyWideningLowerAndAdd(vs2, vColumnSum4.GetUpper(), Vector64.Create((ushort)4, 3, 2, 1)); + + // Sum epi32 ints v_s1(s2) and accumulate in s1(s2). + Vector64 sum1 = AdvSimd.AddPairwise(vs1.GetLower(), vs1.GetUpper()); + Vector64 sum2 = AdvSimd.AddPairwise(vs2.GetLower(), vs2.GetUpper()); + Vector64 s1s2 = AdvSimd.AddPairwise(sum1, sum2); + + // Store the results. + s1 += AdvSimd.Extract(s1s2, 0); + s2 += AdvSimd.Extract(s1s2, 1); + + // Reduce. + s1 %= BASE; + s2 %= BASE; + } + + if (length > 0) + { + HandleLeftOver(localBufferPtr, length, ref s1, ref s2); + } + + return s1 | (s2 << 16); + } + } + private static unsafe void HandleLeftOver(byte* localBufferPtr, uint length, ref uint s1, ref uint s2) { if (length >= 16) @@ -286,7 +381,6 @@ internal static class Adler32 { uint s1 = adler & 0xFFFF; uint s2 = (adler >> 16) & 0xFFFF; - uint k; fixed (byte* bufferPtr = buffer) { @@ -295,7 +389,7 @@ internal static class Adler32 while (length > 0) { - k = length < NMAX ? length : NMAX; + uint k = length < NMAX ? length : NMAX; length -= k; while (k >= 16) diff --git a/src/ImageSharp/Compression/Zlib/Crc32.cs b/src/ImageSharp/Compression/Zlib/Crc32.cs index b8665bd43..c85d58df5 100644 --- a/src/ImageSharp/Compression/Zlib/Crc32.cs +++ b/src/ImageSharp/Compression/Zlib/Crc32.cs @@ -5,6 +5,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +using ArmCrc32 = System.Runtime.Intrinsics.Arm.Crc32; namespace SixLabors.ImageSharp.Compression.Zlib; @@ -187,6 +188,100 @@ internal static partial class Crc32 } } + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + private static unsafe uint CalculateArm(uint crc, ReadOnlySpan buffer) + { + fixed (byte* bufferPtr = buffer) + { + byte* localBufferPtr = bufferPtr; + int len = buffer.Length; + + while (len > 0 && ((ulong)localBufferPtr & 3) != 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + uint* intBufferPtr = (uint*)localBufferPtr; + + while (len >= 8 * sizeof(uint)) + { + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + len -= 8 * sizeof(uint); + } + + while (len >= sizeof(uint)) + { + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + len -= sizeof(uint); + } + + localBufferPtr = (byte*)intBufferPtr; + + while (len > 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + return crc; + } + } + + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + private static unsafe uint CalculateArm64(uint crc, ReadOnlySpan buffer) + { + fixed (byte* bufferPtr = buffer) + { + byte* localBufferPtr = bufferPtr; + int len = buffer.Length; + + while (len > 0 && ((ulong)localBufferPtr & 7) != 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + ulong* longBufferPtr = (ulong*)localBufferPtr; + + while (len >= 8 * sizeof(ulong)) + { + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + len -= 8 * sizeof(ulong); + } + + while (len >= sizeof(ulong)) + { + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + len -= sizeof(ulong); + } + + localBufferPtr = (byte*)longBufferPtr; + + while (len > 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + return crc; + } + } + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] private static uint CalculateScalar(uint crc, ReadOnlySpan buffer) { diff --git a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs index 7a6ea13ca..34e05d779 100644 --- a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs @@ -4,6 +4,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.Formats.Png.Filters; @@ -35,6 +36,10 @@ internal static class AverageFilter { DecodeSse2(scanline, previousScanline); } + else if (AdvSimd.IsSupported && bytesPerPixel is 4) + { + DecodeArm(scanline, previousScanline); + } else { DecodeScalar(scanline, previousScanline, bytesPerPixel); @@ -48,7 +53,7 @@ internal static class AverageFilter ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); Vector128 d = Vector128.Zero; - var ones = Vector128.Create((byte)1); + Vector128 ones = Vector128.Create((byte)1); int rb = scanline.Length; nint offset = 1; @@ -75,6 +80,33 @@ internal static class AverageFilter } } + public static void DecodeArm(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + Vector64 d = Vector64.Zero; + + int rb = scanline.Length; + int offset = 1; + const int bytesPerBatch = 4; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector64 a = d; + Vector64 b = Vector64.CreateScalar(Unsafe.As(ref Unsafe.Add(ref prevBaseRef, offset))).AsByte(); + d = Vector64.CreateScalar(Unsafe.As(ref scanRef)).AsByte(); + + Vector64 avg = AdvSimd.FusedAddHalving(a, b); + d = AdvSimd.Add(d, avg); + + Unsafe.As(ref scanRef) = d.AsInt32().ToScalar(); + + rb -= bytesPerBatch; + offset += bytesPerBatch; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void DecodeScalar(Span scanline, Span previousScanline, int bytesPerPixel) { diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index aee68487d..8998d6bc0 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -5,6 +5,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.Formats.Png.Filters; @@ -38,6 +39,10 @@ internal static class PaethFilter { DecodeSse41(scanline, previousScanline); } + else if (AdvSimd.Arm64.IsSupported && bytesPerPixel is 4) + { + DecodeArm(scanline, previousScanline); + } else { DecodeScalar(scanline, previousScanline, bytesPerPixel); @@ -99,6 +104,80 @@ internal static class PaethFilter } } + public static void DecodeArm(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + Vector128 b = Vector128.Zero; + Vector128 d = Vector128.Zero; + + int rb = scanline.Length; + nint offset = 1; + const int bytesPerBatch = 4; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector128 c = b; + Vector128 a = d; + b = AdvSimd.Arm64.ZipLow( + Vector128.CreateScalar(Unsafe.As(ref Unsafe.Add(ref prevBaseRef, offset))).AsByte(), + Vector128.Zero).AsByte(); + d = AdvSimd.Arm64.ZipLow( + Vector128.CreateScalar(Unsafe.As(ref scanRef)).AsByte(), + Vector128.Zero).AsByte(); + + // (p-a) == (a+b-c - a) == (b-c) + Vector128 pa = AdvSimd.Subtract(b.AsInt16(), c.AsInt16()); + + // (p-b) == (a+b-c - b) == (a-c) + Vector128 pb = AdvSimd.Subtract(a.AsInt16(), c.AsInt16()); + + // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) + Vector128 pc = AdvSimd.Add(pa.AsInt16(), pb.AsInt16()); + + pa = AdvSimd.Abs(pa.AsInt16()).AsInt16(); /* |p-a| */ + pb = AdvSimd.Abs(pb.AsInt16()).AsInt16(); /* |p-b| */ + pc = AdvSimd.Abs(pc.AsInt16()).AsInt16(); /* |p-c| */ + + Vector128 smallest = AdvSimd.Min(pc, AdvSimd.Min(pa, pb)); + + // Paeth breaks ties favoring a over b over c. + Vector128 mask = BlendVariable(c, b, AdvSimd.CompareEqual(smallest, pb).AsByte()); + Vector128 nearest = BlendVariable(mask, a, AdvSimd.CompareEqual(smallest, pa).AsByte()); + + d = AdvSimd.Add(d, nearest); + + Vector64 e = AdvSimd.ExtractNarrowingSaturateUnsignedLower(d.AsInt16()); + + Unsafe.As(ref scanRef) = Vector128.Create(e, e).AsInt32().ToScalar(); + + rb -= bytesPerBatch; + offset += bytesPerBatch; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 BlendVariable(Vector128 a, Vector128 b, Vector128 c) + { + // Equivalent of Sse41.BlendVariable: + // Blend packed 8-bit integers from a and b using mask, and store the results in + // dst. + // + // FOR j := 0 to 15 + // i := j*8 + // IF mask[i+7] + // dst[i+7:i] := b[i+7:i] + // ELSE + // dst[i+7:i] := a[i+7:i] + // FI + // ENDFOR + // + // Use a signed shift right to create a mask with the sign bit. + Vector128 mask = AdvSimd.ShiftRightArithmetic(c.AsInt16(), 7); + return AdvSimd.BitwiseSelect(mask, b.AsInt16(), a.AsInt16()).AsByte(); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void DecodeScalar(Span scanline, Span previousScanline, int bytesPerPixel) { diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs index dcf55237d..0f4aa3fcf 100644 --- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs @@ -5,6 +5,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.Formats.Png.Filters; @@ -29,6 +30,10 @@ internal static class SubFilter { DecodeSse2(scanline); } + else if (AdvSimd.IsSupported && bytesPerPixel is 4) + { + DecodeArm(scanline); + } else { DecodeScalar(scanline, bytesPerPixel); @@ -58,6 +63,30 @@ internal static class SubFilter } } + public static void DecodeArm(Span scanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + + Vector64 d = Vector64.Zero; + + int rb = scanline.Length; + int offset = 1; + const int bytesPerBatch = 4; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector64 a = d; + d = Vector64.CreateScalar(Unsafe.As(ref scanRef)).AsByte(); + + d = AdvSimd.Add(d, a); + + Unsafe.As(ref scanRef) = d.AsInt32().ToScalar(); + + rb -= bytesPerBatch; + offset += bytesPerBatch; + } + } + private static void DecodeScalar(Span scanline, int bytesPerPixel) { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index 833563655..f90b129b6 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -5,6 +5,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp.Formats.Png.Filters; @@ -34,6 +35,10 @@ internal static class UpFilter { DecodeSse2(scanline, previousScanline); } + else if (AdvSimd.IsSupported) + { + DecodeArm(scanline, previousScanline); + } else { DecodeScalar(scanline, previousScanline); @@ -51,11 +56,10 @@ internal static class UpFilter while (rb >= Vector256.Count) { ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); - Vector256 current = Unsafe.As>(ref scanRef); + Vector256 prior = Unsafe.As>(ref scanRef); Vector256 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); - Vector256 sum = Avx2.Add(up, current); - Unsafe.As>(ref scanRef) = sum; + Unsafe.As>(ref scanRef) = Avx2.Add(up, prior); offset += Vector256.Count; rb -= Vector256.Count; @@ -82,11 +86,10 @@ internal static class UpFilter while (rb >= Vector128.Count) { ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); - Vector128 current = Unsafe.As>(ref scanRef); + Vector128 prior = Unsafe.As>(ref scanRef); Vector128 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); - Vector128 sum = Sse2.Add(up, current); - Unsafe.As>(ref scanRef) = sum; + Unsafe.As>(ref scanRef) = Sse2.Add(up, prior); offset += Vector128.Count; rb -= Vector128.Count; @@ -102,6 +105,37 @@ internal static class UpFilter } } + private static void DecodeArm(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + // Up(x) + Prior(x) + int rb = scanline.Length; + nint offset = 1; + const int bytesPerBatch = 16; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector128 prior = Unsafe.As>(ref scanRef); + Vector128 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); + + Unsafe.As>(ref scanRef) = AdvSimd.Add(prior, up); + + offset += bytesPerBatch; + rb -= bytesPerBatch; + } + + // Handle left over. + for (nint i = offset; i < scanline.Length; i++) + { + ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); + byte above = Unsafe.Add(ref prevBaseRef, offset); + scan = (byte)(scan + above); + offset++; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void DecodeScalar(Span scanline, Span previousScanline) { diff --git a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs index 0dea05c53..ff91590f9 100644 --- a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs @@ -2,6 +2,7 @@ // Licensed under the Six Labors Split License. using SixLabors.ImageSharp.Compression.Zlib; +using SixLabors.ImageSharp.Tests.TestUtilities; using SharpCrc32 = ICSharpCode.SharpZipLib.Checksum.Crc32; namespace SixLabors.ImageSharp.Tests.Formats.Png; @@ -13,10 +14,7 @@ public class Crc32Tests [InlineData(0)] [InlineData(1)] [InlineData(2)] - public void ReturnsCorrectWhenEmpty(uint input) - { - Assert.Equal(input, Crc32.Calculate(input, default)); - } + public void CalculateCrc_ReturnsCorrectResultWhenEmpty(uint input) => Assert.Equal(input, Crc32.Calculate(input, default)); [Theory] [InlineData(0)] @@ -26,23 +24,43 @@ public class Crc32Tests [InlineData(1024 + 15)] [InlineData(2034)] [InlineData(4096)] - public void MatchesReference(int length) + public void CalculateCrc_MatchesReference(int length) => CalculateCrcAndCompareToReference(length); + + private static void CalculateCrcAndCompareToReference(int length) { - var data = GetBuffer(length); - var crc = new SharpCrc32(); + // arrange + byte[] data = GetBuffer(length); + SharpCrc32 crc = new(); crc.Update(data); - long expected = crc.Value; + + // act long actual = Crc32.Calculate(data); + // assert Assert.Equal(expected, actual); } private static byte[] GetBuffer(int length) { - var data = new byte[length]; + byte[] data = new byte[length]; new Random(1).NextBytes(data); return data; } + + [Fact] + public void RunCalculateCrcTest_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCalculateCrcTest, HwIntrinsics.AllowAll); + + [Fact] + public void RunCalculateCrcTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCalculateCrcTest, HwIntrinsics.DisableHWIntrinsic); + + private static void RunCalculateCrcTest() + { + int[] testData = { 0, 8, 215, 1024, 1024 + 15, 2034, 4096 }; + for (int i = 0; i < testData.Length; i++) + { + CalculateCrcAndCompareToReference(testData[i]); + } + } } diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index ad5aa4769..1bb64d99d 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -12,7 +12,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities; /// public static class FeatureTestRunner { - private static readonly char[] SplitChars = new[] { ',', ' ' }; + private static readonly char[] SplitChars = { ',', ' ' }; /// /// Allows the deserialization of parameters passed to the feature test. @@ -35,6 +35,7 @@ public static class FeatureTestRunner /// Allows the deserialization of types implementing /// passed to the feature test. /// + /// The type of object to deserialize. /// The string value to deserialize. /// The value. public static T Deserialize(string value) @@ -58,7 +59,7 @@ public static class FeatureTestRunner foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) { - var processStartInfo = new ProcessStartInfo(); + ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; @@ -99,7 +100,7 @@ public static class FeatureTestRunner foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) { - var processStartInfo = new ProcessStartInfo(); + ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; @@ -142,7 +143,7 @@ public static class FeatureTestRunner foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) { - var processStartInfo = new ProcessStartInfo(); + ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; @@ -185,7 +186,7 @@ public static class FeatureTestRunner foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) { - var processStartInfo = new ProcessStartInfo(); + ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; @@ -232,7 +233,7 @@ public static class FeatureTestRunner foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) { - var processStartInfo = new ProcessStartInfo(); + ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; @@ -276,7 +277,7 @@ public static class FeatureTestRunner foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) { - var processStartInfo = new ProcessStartInfo(); + ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; @@ -321,7 +322,7 @@ public static class FeatureTestRunner foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) { - var processStartInfo = new ProcessStartInfo(); + ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; @@ -347,11 +348,11 @@ public static class FeatureTestRunner internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) { - // Loop through and translate the given values into COMPlus equivaluents - var features = new Dictionary(); + // Loop through and translate the given values into COMPlus equivalents + Dictionary features = new(); foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) { - var key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic); + HwIntrinsics key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic); switch (intrinsic) { case nameof(HwIntrinsics.AllowAll): @@ -400,5 +401,11 @@ public enum HwIntrinsics DisableBMI1 = 1 << 13, DisableBMI2 = 1 << 14, DisableLZCNT = 1 << 15, - AllowAll = 1 << 16 + DisableArm64AdvSimd = 1 << 16, + DisableArm64Crc32 = 1 << 17, + DisableArm64Dp = 1 << 18, + DisableArm64Aes = 1 << 19, + DisableArm64Sha1 = 1 << 20, + DisableArm64Sha256 = 1 << 21, + AllowAll = 1 << 22 } diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 6ce07e766..34337600e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -2,8 +2,10 @@ // Licensed under the Six Labors Split License. using System.Numerics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using Xunit.Abstractions; +using Aes = System.Runtime.Intrinsics.X86.Aes; namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests; @@ -12,9 +14,9 @@ public class FeatureTestRunnerTests public static TheoryData Intrinsics => new() { - { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new string[] { "EnableAES", "AllowAll" } }, - { HwIntrinsics.DisableHWIntrinsic, new string[] { "EnableHWIntrinsic" } }, - { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } + { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new[] { "EnableAES", "AllowAll" } }, + { HwIntrinsics.DisableHWIntrinsic, new[] { "EnableHWIntrinsic" } }, + { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new[] { "EnableSSE42", "EnableAVX" } } }; [Theory] @@ -101,6 +103,12 @@ public class FeatureTestRunnerTests Assert.False(Bmi1.IsSupported); Assert.False(Bmi2.IsSupported); Assert.False(Lzcnt.IsSupported); + Assert.False(AdvSimd.IsSupported); + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + Assert.False(Crc32.IsSupported); + Assert.False(Dp.IsSupported); + Assert.False(Sha1.IsSupported); + Assert.False(Sha256.IsSupported); break; case HwIntrinsics.DisableSSE: Assert.False(Sse.IsSupported); @@ -147,6 +155,24 @@ public class FeatureTestRunnerTests case HwIntrinsics.DisableLZCNT: Assert.False(Lzcnt.IsSupported); break; + case HwIntrinsics.DisableArm64AdvSimd: + Assert.False(AdvSimd.IsSupported); + break; + case HwIntrinsics.DisableArm64Aes: + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + break; + case HwIntrinsics.DisableArm64Crc32: + Assert.False(Crc32.IsSupported); + break; + case HwIntrinsics.DisableArm64Dp: + Assert.False(Dp.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha1: + Assert.False(Sha1.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha256: + Assert.False(Sha256.IsSupported); + break; } } @@ -198,6 +224,12 @@ public class FeatureTestRunnerTests Assert.False(Bmi1.IsSupported); Assert.False(Bmi2.IsSupported); Assert.False(Lzcnt.IsSupported); + Assert.False(AdvSimd.IsSupported); + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + Assert.False(Crc32.IsSupported); + Assert.False(Dp.IsSupported); + Assert.False(Sha1.IsSupported); + Assert.False(Sha256.IsSupported); break; case HwIntrinsics.DisableSSE: Assert.False(Sse.IsSupported); @@ -244,6 +276,24 @@ public class FeatureTestRunnerTests case HwIntrinsics.DisableLZCNT: Assert.False(Lzcnt.IsSupported); break; + case HwIntrinsics.DisableArm64AdvSimd: + Assert.False(AdvSimd.IsSupported); + break; + case HwIntrinsics.DisableArm64Aes: + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + break; + case HwIntrinsics.DisableArm64Crc32: + Assert.False(Crc32.IsSupported); + break; + case HwIntrinsics.DisableArm64Dp: + Assert.False(Dp.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha1: + Assert.False(Sha1.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha256: + Assert.False(Sha256.IsSupported); + break; } }