From 6ae2eb9eb208681a20bbcfbc8932fea5feae60f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Foidl?= Date: Mon, 13 Mar 2023 13:17:26 +0100 Subject: [PATCH] Used unsigned division for vector sizes to get better codegen Cf. https://sharplab.io/#v2:EYLgxg9gTgpgtADwGwBYA0AXEBDAzgWwB8ABAJgEYBYAKGIAYACY8gOgCUBXAOwwEt8YLAJI8ovLrl5hcAbho1iAZiakGAYQYBvGg11NlXcRgYBZcgAojDADYwuAcwwALAJQMAvAD4bdx04YA9AwAajBgGNDkpAAcADwAZtYQ2BieLGoQ3Bhy1Hr6DIY8pqSWRbYOzm5eDOaFGC7mHEYu5X6BIWERUFFxicmp6Zk8OQC+QA= --- src/ImageSharp/Common/Helpers/Numerics.cs | 12 ++- .../Helpers/SimdUtils.ExtendedIntrinsics.cs | 8 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 84 +++++++++---------- .../Formats/Jpeg/Components/Block8x8F.cs | 4 +- .../JpegColorConverter.CmykAvx.cs | 4 +- .../JpegColorConverter.CmykVector.cs | 4 +- .../JpegColorConverter.GrayScaleAvx.cs | 4 +- .../JpegColorConverter.GrayScaleVector.cs | 4 +- .../JpegColorConverter.RgbAvx.cs | 2 +- .../JpegColorConverter.RgbVector.cs | 2 +- .../JpegColorConverter.YCbCrAvx.cs | 4 +- .../JpegColorConverter.YCbCrVector.cs | 4 +- .../JpegColorConverter.YccKAvx.cs | 4 +- .../JpegColorConverter.YccKVector.cs | 4 +- .../Components/Encoder/ComponentProcessor.cs | 8 +- .../ImageSharp.Benchmarks/Bulk/FromVector4.cs | 4 +- .../Bulk/ToVector4_Rgba32.cs | 12 +-- .../PixelConversion_PackFromRgbPlanes.cs | 4 +- .../General/Vectorization/UInt32ToSingle.cs | 16 ++-- .../General/Vectorization/VectorFetching.cs | 18 ++-- .../Vectorization/WidenBytesToUInt32.cs | 4 +- 21 files changed, 108 insertions(+), 102 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 7ba60cfe5..6fbd48f8c 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -55,6 +55,12 @@ internal static class Numerics [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Modulo4(int x) => x & 3; + /// + /// Calculates % 4 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static nint Modulo4(nint x) => x & 3; + /// /// Calculates % 8 /// @@ -430,9 +436,9 @@ internal static class Numerics var vmin = new Vector(min); var vmax = new Vector(max); - int n = span.Length / Vector.Count; - int m = Modulo4(n); - int u = n - m; + nint n = (nint)(uint)span.Length / Vector.Count; + nint m = Modulo4(n); + nint u = n - m; ref Vector vs0 = ref Unsafe.As>(ref MemoryMarshal.GetReference(span)); ref Vector vs1 = ref Unsafe.Add(ref vs0, 1); diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs index 2014a2a35..9d2da7dc8 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs @@ -97,12 +97,12 @@ internal static partial class SimdUtils { VerifySpanInput(source, dest, Vector.Count); - int n = dest.Length / Vector.Count; + nint n = (nint)(uint)dest.Length / Vector.Count; ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { Vector b = Unsafe.Add(ref sourceBase, i); @@ -132,13 +132,13 @@ internal static partial class SimdUtils { VerifySpanInput(source, dest, Vector.Count); - int n = dest.Length / Vector.Count; + nint n = (nint)(uint)dest.Length / Vector.Count; ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector s = ref Unsafe.Add(ref sourceBase, i * 4); diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 3841b64b4..a82b5559c 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -221,11 +221,11 @@ internal static partial class SimdUtils ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - int n = dest.Length / Vector256.Count; - int m = Numerics.Modulo4(n); - int u = n - m; + nint n = (nint)(uint)dest.Length / Vector256.Count; + nint m = Numerics.Modulo4(n); + nint u = n - m; - for (int i = 0; i < u; i += 4) + for (nint i = 0; i < u; i += 4) { ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); @@ -238,7 +238,7 @@ internal static partial class SimdUtils if (m > 0) { - for (int i = u; i < n; i++) + for (nint i = u; i < n; i++) { Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); } @@ -253,11 +253,11 @@ internal static partial class SimdUtils ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - int n = dest.Length / Vector128.Count; - int m = Numerics.Modulo4(n); - int u = n - m; + nint n = (nint)(uint)dest.Length / Vector128.Count; + nint m = Numerics.Modulo4(n); + nint u = n - m; - for (int i = 0; i < u; i += 4) + for (nint i = 0; i < u; i += 4) { ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); @@ -276,7 +276,7 @@ internal static partial class SimdUtils if (m > 0) { - for (int i = u; i < n; i++) + for (nint i = u; i < n; i++) { Vector128 vs = Unsafe.Add(ref sourceBase, i); Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); @@ -306,11 +306,11 @@ internal static partial class SimdUtils ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - int n = dest.Length / Vector256.Count; - int m = Numerics.Modulo4(n); - int u = n - m; + nint n = (nint)(uint)dest.Length / Vector256.Count; + nint m = Numerics.Modulo4(n); + nint u = n - m; - for (int i = 0; i < u; i += 4) + for (nint i = 0; i < u; i += 4) { ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); @@ -323,7 +323,7 @@ internal static partial class SimdUtils if (m > 0) { - for (int i = u; i < n; i++) + for (nint i = u; i < n; i++) { Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } @@ -342,11 +342,11 @@ internal static partial class SimdUtils ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - int n = dest.Length / Vector128.Count; - int m = Numerics.Modulo4(n); - int u = n - m; + nint n = (nint)(uint)dest.Length / Vector128.Count; + nint m = Numerics.Modulo4(n); + nint u = n - m; - for (int i = 0; i < u; i += 4) + for (nint i = 0; i < u; i += 4) { ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); @@ -359,7 +359,7 @@ internal static partial class SimdUtils if (m > 0) { - for (int i = u; i < n; i++) + for (nint i = u; i < n; i++) { Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } @@ -391,9 +391,9 @@ internal static partial class SimdUtils ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - int n = source.Length / Vector128.Count; + nint n = (nint)(uint)source.Length / Vector128.Count; - for (int i = 0; i < n; i += 3) + for (nint i = 0; i < n; i += 3) { ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); @@ -454,9 +454,9 @@ internal static partial class SimdUtils ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - int n = source.Length / Vector128.Count; + nint n = (nint)(uint)source.Length / Vector128.Count; - for (int i = 0, j = 0; i < n; i += 3, j += 4) + for (nint i = 0, j = 0; i < n; i += 3, j += 4) { ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); Vector128 v1 = Unsafe.Add(ref v0, 1); @@ -498,9 +498,9 @@ internal static partial class SimdUtils ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - int n = source.Length / Vector128.Count; + nint n = (nint)(uint)source.Length / Vector128.Count; - for (int i = 0, j = 0; i < n; i += 4, j += 3) + for (nint i = 0, j = 0; i < n; i += 4, j += 3) { ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); @@ -650,16 +650,16 @@ internal static partial class SimdUtils { VerifySpanInput(source, dest, Vector256.Count); - int n = dest.Length / Vector256.Count; + nint n = (nint)(uint)dest.Length / Vector256.Count; ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); var scale = Vector256.Create(1 / (float)byte.MaxValue); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { - int si = Vector256.Count * i; + nint si = Vector256.Count * i; Vector256 i0 = Avx2.ConvertToVector256Int32(sourceBase + si); Vector256 i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256.Count); Vector256 i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 2)); @@ -683,7 +683,7 @@ internal static partial class SimdUtils // Sse VerifySpanInput(source, dest, Vector128.Count); - int n = dest.Length / Vector128.Count; + nint n = (nint)(uint)dest.Length / Vector128.Count; ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); @@ -691,9 +691,9 @@ internal static partial class SimdUtils var scale = Vector128.Create(1 / (float)byte.MaxValue); Vector128 zero = Vector128.Zero; - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { - int si = Vector128.Count * i; + nint si = Vector128.Count * i; Vector128 i0, i1, i2, i3; if (Sse41.IsSupported) @@ -782,7 +782,7 @@ internal static partial class SimdUtils { VerifySpanInput(source, dest, Vector256.Count); - int n = dest.Length / Vector256.Count; + nint n = (nint)(uint)dest.Length / Vector256.Count; ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -794,7 +794,7 @@ internal static partial class SimdUtils ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); Vector256 mask = Unsafe.As>(ref maskBase); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); @@ -821,7 +821,7 @@ internal static partial class SimdUtils // Sse VerifySpanInput(source, dest, Vector128.Count); - int n = dest.Length / Vector128.Count; + nint n = (nint)(uint)dest.Length / Vector128.Count; ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -831,7 +831,7 @@ internal static partial class SimdUtils var scale = Vector128.Create((float)byte.MaxValue); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector128 s = ref Unsafe.Add(ref sourceBase, i * 4); @@ -864,7 +864,7 @@ internal static partial class SimdUtils ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - int count = redChannel.Length / Vector256.Count; + nint count = (nint)(uint)redChannel.Length / Vector256.Count; ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32); Vector256 control1 = Unsafe.As>(ref control1Bytes); @@ -875,7 +875,7 @@ internal static partial class SimdUtils Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); - for (int i = 0; i < count; i++) + for (nint i = 0; i < count; i++) { Vector256 r0 = Unsafe.Add(ref rBase, i); Vector256 g0 = Unsafe.Add(ref gBase, i); @@ -918,7 +918,7 @@ internal static partial class SimdUtils Unsafe.As>(ref d4) = rgb4; } - int slice = count * Vector256.Count; + int slice = (int)count * Vector256.Count; redChannel = redChannel[slice..]; greenChannel = greenChannel[slice..]; blueChannel = blueChannel[slice..]; @@ -936,12 +936,12 @@ internal static partial class SimdUtils ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); ref Vector256 dBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - int count = redChannel.Length / Vector256.Count; + nint count = (nint)(uint)redChannel.Length / Vector256.Count; ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32); Vector256 control1 = Unsafe.As>(ref control1Bytes); var a = Vector256.Create((byte)255); - for (int i = 0; i < count; i++) + for (nint i = 0; i < count; i++) { Vector256 r0 = Unsafe.Add(ref rBase, i); Vector256 g0 = Unsafe.Add(ref gBase, i); @@ -970,7 +970,7 @@ internal static partial class SimdUtils Unsafe.Add(ref d0, 3) = rgb4; } - int slice = count * Vector256.Count; + int slice = (int)count * Vector256.Count; redChannel = redChannel[slice..]; greenChannel = greenChannel[slice..]; blueChannel = blueChannel[slice..]; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 3bd5d28e7..a0a8cd28e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -425,7 +425,7 @@ internal partial struct Block8x8F : IEquatable Vector256 targetVector = Vector256.Create(value); ref Vector256 blockStride = ref this.V0; - for (int i = 0; i < RowCount; i++) + for (nint i = 0; i < RowCount; i++) { Vector256 areEqual = Avx2.CompareEqual(Avx.ConvertToVector256Int32WithTruncation(Unsafe.Add(ref this.V0, i)), targetVector); if (Avx2.MoveMask(areEqual.AsByte()) != equalityMask) @@ -439,7 +439,7 @@ internal partial struct Block8x8F : IEquatable ref float scalars = ref Unsafe.As(ref this); - for (int i = 0; i < Size; i++) + for (nint i = 0; i < Size; i++) { if ((int)Unsafe.Add(ref scalars, i) != value) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykAvx.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykAvx.cs index 7d7b7e185..3144afa76 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykAvx.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykAvx.cs @@ -32,7 +32,7 @@ internal abstract partial class JpegColorConverterBase // Used for the color conversion var scale = Vector256.Create(1 / (this.MaximumValue * this.MaximumValue)); - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { ref Vector256 c = ref Unsafe.Add(ref c0Base, i); @@ -71,7 +71,7 @@ internal abstract partial class JpegColorConverterBase var scale = Vector256.Create(maxValue); - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { Vector256 ctmp = Avx.Subtract(scale, Unsafe.Add(ref srcR, i)); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs index 93dcd378c..03d9a1532 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.CmykVector.cs @@ -30,7 +30,7 @@ internal abstract partial class JpegColorConverterBase var scale = new Vector(1 / (this.MaximumValue * this.MaximumValue)); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { ref Vector c = ref Unsafe.Add(ref cBase, i); @@ -78,7 +78,7 @@ internal abstract partial class JpegColorConverterBase // Used for the color conversion var scale = new Vector(maxValue); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { Vector ctmp = scale - Unsafe.Add(ref srcR, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleAvx.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleAvx.cs index 9cdbe71e8..4bb986972 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleAvx.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleAvx.cs @@ -27,7 +27,7 @@ internal abstract partial class JpegColorConverterBase // Used for the color conversion var scale = Vector256.Create(1 / this.MaximumValue); - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { ref Vector256 c0 = ref Unsafe.Add(ref c0Base, i); @@ -53,7 +53,7 @@ internal abstract partial class JpegColorConverterBase var f0587 = Vector256.Create(0.587f); var f0114 = Vector256.Create(0.114f); - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { ref Vector256 r = ref Unsafe.Add(ref srcRed, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs index 4d2355b95..d8ba115d2 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.GrayScaleVector.cs @@ -24,7 +24,7 @@ internal abstract partial class JpegColorConverterBase var scale = new Vector(1 / this.MaximumValue); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { ref Vector c0 = ref Unsafe.Add(ref cBase, i); @@ -53,7 +53,7 @@ internal abstract partial class JpegColorConverterBase var gMult = new Vector(0.587f); var bMult = new Vector(0.114f); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { Vector r = Unsafe.Add(ref srcR, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbAvx.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbAvx.cs index b6c5117d4..76b2e9936 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbAvx.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbAvx.cs @@ -29,7 +29,7 @@ internal abstract partial class JpegColorConverterBase // Used for the color conversion var scale = Vector256.Create(1 / this.MaximumValue); - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { ref Vector256 r = ref Unsafe.Add(ref rBase, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs index e51b0df4d..5d85bb048 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.RgbVector.cs @@ -28,7 +28,7 @@ internal abstract partial class JpegColorConverterBase var scale = new Vector(1 / this.MaximumValue); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { ref Vector r = ref Unsafe.Add(ref rBase, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrAvx.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrAvx.cs index 081b985db..59f24493a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrAvx.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrAvx.cs @@ -38,7 +38,7 @@ internal abstract partial class JpegColorConverterBase var bCbMult = Vector256.Create(YCbCrScalar.BCbMult); // Walking 8 elements at one step: - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { // y = yVals[i]; @@ -98,7 +98,7 @@ internal abstract partial class JpegColorConverterBase var fn0081312F = Vector256.Create(-0.081312F); var f05 = Vector256.Create(0.5f); - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { Vector256 r = Unsafe.Add(ref srcR, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs index 85211d4ab..0f7a36486 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YCbCrVector.cs @@ -35,7 +35,7 @@ internal abstract partial class JpegColorConverterBase var gCrMult = new Vector(-YCbCrScalar.GCrMult); var bCbMult = new Vector(YCbCrScalar.BCbMult); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { // y = yVals[i]; @@ -103,7 +103,7 @@ internal abstract partial class JpegColorConverterBase var gCrMult = new Vector(0.418688f); var bCrMult = new Vector(0.081312f); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { Vector r = Unsafe.Add(ref srcR, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKAvx.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKAvx.cs index 1f79cbffb..0cfb3201b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKAvx.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKAvx.cs @@ -40,7 +40,7 @@ internal abstract partial class JpegColorConverterBase var bCbMult = Vector256.Create(YCbCrScalar.BCbMult); // Walking 8 elements at one step: - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { // y = yVals[i]; @@ -109,7 +109,7 @@ internal abstract partial class JpegColorConverterBase var fn0081312F = Vector256.Create(-0.081312F); var f05 = Vector256.Create(0.5f); - nint n = values.Component0.Length / Vector256.Count; + nint n = (nint)(uint)values.Component0.Length / Vector256.Count; for (nint i = 0; i < n; i++) { Vector256 r = Avx.Subtract(maxSampleValue, Unsafe.Add(ref srcR, i)); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs index 91a6cedc0..feefe3021 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ColorConverters/JpegColorConverter.YccKVector.cs @@ -36,7 +36,7 @@ internal abstract partial class JpegColorConverterBase var gCrMult = new Vector(-YCbCrScalar.GCrMult); var bCbMult = new Vector(YCbCrScalar.BCbMult); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { // y = yVals[i]; @@ -107,7 +107,7 @@ internal abstract partial class JpegColorConverterBase var gCrMult = new Vector(0.418688f); var bCrMult = new Vector(0.081312f); - nint n = values.Component0.Length / Vector.Count; + nint n = (nint)(uint)values.Component0.Length / Vector.Count; for (nint i = 0; i < n; i++) { Vector r = maxSampleValue - Unsafe.Add(ref srcR, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/ComponentProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/ComponentProcessor.cs index 2bc140550..23ddd0e49 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/ComponentProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/ComponentProcessor.cs @@ -122,7 +122,7 @@ internal class ComponentProcessor : IDisposable ref Vector256 sourceVectorRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); // Spans are guaranteed to be multiple of 8 so no extra 'remainder' steps are needed - nint count = source.Length / Vector256.Count; + nint count = (nint)(uint)source.Length / Vector256.Count; for (nint i = 0; i < count; i++) { Unsafe.Add(ref targetVectorRef, i) = Avx.Add(Unsafe.Add(ref targetVectorRef, i), Unsafe.Add(ref sourceVectorRef, i)); @@ -133,7 +133,7 @@ internal class ComponentProcessor : IDisposable ref Vector targetVectorRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(target)); ref Vector sourceVectorRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - nint count = source.Length / Vector.Count; + nint count = (nint)(uint)source.Length / Vector.Count; for (nint i = 0; i < count; i++) { Unsafe.Add(ref targetVectorRef, i) += Unsafe.Add(ref sourceVectorRef, i); @@ -200,7 +200,7 @@ internal class ComponentProcessor : IDisposable ref Vector256 targetVectorRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(target)); // Spans are guaranteed to be multiple of 8 so no extra 'remainder' steps are needed - nint count = target.Length / Vector256.Count; + nint count = (nint)(uint)target.Length / Vector256.Count; var multiplierVector = Vector256.Create(multiplier); for (nint i = 0; i < count; i++) { @@ -211,7 +211,7 @@ internal class ComponentProcessor : IDisposable { ref Vector targetVectorRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(target)); - nint count = target.Length / Vector.Count; + nint count = (nint)(uint)target.Length / Vector.Count; var multiplierVector = new Vector(multiplier); for (nint i = 0; i < count; i++) { diff --git a/tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs index dd3fb8ac8..0637b3334 100644 --- a/tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs @@ -103,7 +103,7 @@ public class FromVector4Rgba32 : FromVector4 Span src = MemoryMarshal.Cast(this.source.GetSpan()); Span dest = MemoryMarshal.Cast(this.destination.GetSpan()); - int n = dest.Length / Vector.Count; + nint n = (nint)(uint)dest.Length / Vector.Count; ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(src)); @@ -114,7 +114,7 @@ public class FromVector4Rgba32 : FromVector4 var maxBytes = Vector256.Create(255f); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); diff --git a/tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgba32.cs b/tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgba32.cs index b0eb6b46d..913ab24dc 100644 --- a/tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgba32.cs +++ b/tests/ImageSharp.Benchmarks/Bulk/ToVector4_Rgba32.cs @@ -54,13 +54,13 @@ public class ToVector4_Rgba32 : ToVector4 Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); - int n = dFloats.Length / Vector.Count; + nint n = (nint)(uint)dFloats.Length / Vector.Count; ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference((ReadOnlySpan)sBytes)); ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dFloats)); ref Vector destBaseU = ref Unsafe.As, Vector>(ref destBase); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { Vector b = Unsafe.Add(ref sourceBase, i); @@ -75,10 +75,10 @@ public class ToVector4_Rgba32 : ToVector4 Unsafe.Add(ref d, 3) = w3; } - n = dFloats.Length / Vector.Count; + n = (nint)(uint)dFloats.Length / Vector.Count; var scale = new Vector(1f / 255f); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector dRef = ref Unsafe.Add(ref destBase, i); @@ -96,13 +96,13 @@ public class ToVector4_Rgba32 : ToVector4 Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); - int n = dFloats.Length / Vector.Count; + nint n = (nint)(uint)dFloats.Length / Vector.Count; ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference((ReadOnlySpan)sBytes)); ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dFloats)); var scale = new Vector(1f / 255f); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { Vector b = Unsafe.Add(ref sourceBase, i); diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs index 86ac928af..061b1e126 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -205,14 +205,14 @@ public unsafe class PixelConversion_PackFromRgbPlanes ref Vector256 bBase = ref Unsafe.As>(ref this.bFloat[0]); ref Vector256 resultBase = ref Unsafe.As>(ref this.rgbaFloat[0]); - int count = this.Count / Vector256.Count; + nint count = (nint)(uint)this.Count / Vector256.Count; ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); Vector256 vcontrol = Unsafe.As>(ref control); var va = Vector256.Create(1F); - for (int i = 0; i < count; i++) + for (nint i = 0; i < count; i++) { Vector256 r = Unsafe.Add(ref rBase, i); Vector256 g = Unsafe.Add(ref gBase, i); diff --git a/tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs b/tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs index 63d363c68..57c7b6faf 100644 --- a/tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs +++ b/tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs @@ -25,14 +25,14 @@ public class UInt32ToSingle { ref Vector b = ref Unsafe.As>(ref this.data[0]); - int n = Count / Vector.Count; + nint n = Count / Vector.Count; var bVec = new Vector(256.0f / 255.0f); var magicFloat = new Vector(32768.0f); var magicInt = new Vector(1191182336); // reinterpreted value of 32768.0f var mask = new Vector(255); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector df = ref Unsafe.Add(ref b, i); @@ -50,14 +50,14 @@ public class UInt32ToSingle [Benchmark] public void StandardSimd() { - int n = Count / Vector.Count; + nint n = Count / Vector.Count; ref Vector bf = ref Unsafe.As>(ref this.data[0]); ref Vector bu = ref Unsafe.As, Vector>(ref bf); var scale = new Vector(1f / 255f); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { Vector u = Unsafe.Add(ref bu, i); Vector v = Vector.ConvertToSingle(u); @@ -69,14 +69,14 @@ public class UInt32ToSingle [Benchmark] public void StandardSimdFromInt() { - int n = Count / Vector.Count; + nint n = Count / Vector.Count; ref Vector bf = ref Unsafe.As>(ref this.data[0]); ref Vector bu = ref Unsafe.As, Vector>(ref bf); var scale = new Vector(1f / 255f); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { Vector u = Unsafe.Add(ref bu, i); Vector v = Vector.ConvertToSingle(u); @@ -88,12 +88,12 @@ public class UInt32ToSingle [Benchmark] public void StandardSimdFromInt_RefCast() { - int n = Count / Vector.Count; + nint n = Count / Vector.Count; ref Vector bf = ref Unsafe.As>(ref this.data[0]); var scale = new Vector(1f / 255f); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector fRef = ref Unsafe.Add(ref bf, i); diff --git a/tests/ImageSharp.Benchmarks/General/Vectorization/VectorFetching.cs b/tests/ImageSharp.Benchmarks/General/Vectorization/VectorFetching.cs index 07ace0668..7da2626dc 100644 --- a/tests/ImageSharp.Benchmarks/General/Vectorization/VectorFetching.cs +++ b/tests/ImageSharp.Benchmarks/General/Vectorization/VectorFetching.cs @@ -63,14 +63,14 @@ public class VectorFetching var v = new Vector(this.testValue); ref Vector start = ref Unsafe.As>(ref this.data[0]); - int n = this.InputSize / Vector.Count; + nint n = (nint)(uint)this.InputSize / Vector.Count; - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector p = ref Unsafe.Add(ref start, i); Vector a = p; - a = a * v; + a *= v; p = a; } @@ -82,12 +82,12 @@ public class VectorFetching var v = new Vector(this.testValue); ref Vector start = ref Unsafe.As>(ref this.data[0]); - int n = this.InputSize / Vector.Count; + nint n = (nint)(uint)this.InputSize / Vector.Count; - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector a = ref Unsafe.Add(ref start, i); - a = a * v; + a *= v; } } @@ -100,12 +100,12 @@ public class VectorFetching ref Vector start = ref Unsafe.As>(ref MemoryMarshal.GetReference(span)); - int n = this.InputSize / Vector.Count; + nint n = (nint)(uint)this.InputSize / Vector.Count; - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { ref Vector a = ref Unsafe.Add(ref start, i); - a = a * v; + a *= v; } } } diff --git a/tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs b/tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs index 429475ffd..4615376a5 100644 --- a/tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs +++ b/tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs @@ -42,12 +42,12 @@ public class WidenBytesToUInt32 [Benchmark] public void Simd() { - int n = Count / Vector.Count; + nint n = Count / Vector.Count; ref Vector sBase = ref Unsafe.As>(ref this.source[0]); ref Vector dBase = ref Unsafe.As>(ref this.dest[0]); - for (int i = 0; i < n; i++) + for (nint i = 0; i < n; i++) { Vector b = Unsafe.Add(ref sBase, i);