diff --git a/.gitignore b/.gitignore
index 769a40c6c..fadf36964 100644
--- a/.gitignore
+++ b/.gitignore
@@ -223,3 +223,7 @@ artifacts/
**/Images/ReferenceOutput
**/Images/Input/MemoryStress
.DS_Store
+
+#lfs
+hooks/**
+lfs/**
diff --git a/shared-infrastructure b/shared-infrastructure
index f48ab8291..a042aba17 160000
--- a/shared-infrastructure
+++ b/shared-infrastructure
@@ -1 +1 @@
-Subproject commit f48ab829167c42c69242ed0d303683232fbfccd1
+Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3
diff --git a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
index 5415249d2..e1f36d9d6 100644
--- a/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
+++ b/src/ImageSharp/Advanced/ParallelExecutionSettings.cs
@@ -3,7 +3,6 @@
using System;
using System.Threading.Tasks;
-
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Advanced
diff --git a/src/ImageSharp/Color/Color.Conversions.cs b/src/ImageSharp/Color/Color.Conversions.cs
index cd3fc8fd9..0455fd26a 100644
--- a/src/ImageSharp/Color/Color.Conversions.cs
+++ b/src/ImageSharp/Color/Color.Conversions.cs
@@ -3,7 +3,6 @@
using System.Numerics;
using System.Runtime.CompilerServices;
-
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
@@ -95,4 +94,4 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
internal Vector4 ToVector4() => this.data.ToVector4();
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Color/Color.WebSafePalette.cs b/src/ImageSharp/Color/Color.WebSafePalette.cs
index cad6553c0..1cffb841c 100644
--- a/src/ImageSharp/Color/Color.WebSafePalette.cs
+++ b/src/ImageSharp/Color/Color.WebSafePalette.cs
@@ -163,4 +163,4 @@ namespace SixLabors.ImageSharp
YellowGreen
};
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs
index 72f16528a..d5eedc160 100644
--- a/src/ImageSharp/Color/Color.cs
+++ b/src/ImageSharp/Color/Color.cs
@@ -5,7 +5,6 @@ using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
diff --git a/src/ImageSharp/ColorSpaces/CieLab.cs b/src/ImageSharp/ColorSpaces/CieLab.cs
index 4d25836ec..c1b9aab37 100644
--- a/src/ImageSharp/ColorSpaces/CieLab.cs
+++ b/src/ImageSharp/ColorSpaces/CieLab.cs
@@ -136,4 +136,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.WhitePoint.Equals(other.WhitePoint);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/CieLch.cs b/src/ImageSharp/ColorSpaces/CieLch.cs
index 3e94790bb..7722b705e 100644
--- a/src/ImageSharp/ColorSpaces/CieLch.cs
+++ b/src/ImageSharp/ColorSpaces/CieLch.cs
@@ -162,4 +162,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
return result;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/CieLchuv.cs b/src/ImageSharp/ColorSpaces/CieLchuv.cs
index 272c53556..ed8e72fc9 100644
--- a/src/ImageSharp/ColorSpaces/CieLchuv.cs
+++ b/src/ImageSharp/ColorSpaces/CieLchuv.cs
@@ -157,4 +157,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
return result;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/CieLuv.cs b/src/ImageSharp/ColorSpaces/CieLuv.cs
index b11447fa7..6b69b9088 100644
--- a/src/ImageSharp/ColorSpaces/CieLuv.cs
+++ b/src/ImageSharp/ColorSpaces/CieLuv.cs
@@ -137,4 +137,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.WhitePoint.Equals(other.WhitePoint);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/CieXyy.cs b/src/ImageSharp/ColorSpaces/CieXyy.cs
index 526c03831..5e3b444ac 100644
--- a/src/ImageSharp/ColorSpaces/CieXyy.cs
+++ b/src/ImageSharp/ColorSpaces/CieXyy.cs
@@ -100,4 +100,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.Yl.Equals(other.Yl);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/CieXyz.cs b/src/ImageSharp/ColorSpaces/CieXyz.cs
index aaf48c0b9..ceffd727d 100644
--- a/src/ImageSharp/ColorSpaces/CieXyz.cs
+++ b/src/ImageSharp/ColorSpaces/CieXyz.cs
@@ -103,4 +103,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.Z.Equals(other.Z);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Cmyk.cs b/src/ImageSharp/ColorSpaces/Cmyk.cs
index 675f1f814..fb8efad63 100644
--- a/src/ImageSharp/ColorSpaces/Cmyk.cs
+++ b/src/ImageSharp/ColorSpaces/Cmyk.cs
@@ -108,4 +108,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.K.Equals(other.K);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Companding/GammaCompanding.cs b/src/ImageSharp/ColorSpaces/Companding/GammaCompanding.cs
index b72332ebe..440aa4185 100644
--- a/src/ImageSharp/ColorSpaces/Companding/GammaCompanding.cs
+++ b/src/ImageSharp/ColorSpaces/Companding/GammaCompanding.cs
@@ -33,4 +33,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static float Compress(float channel, float gamma) => MathF.Pow(channel, 1 / gamma);
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Companding/Rec2020Companding.cs b/src/ImageSharp/ColorSpaces/Companding/Rec2020Companding.cs
index 2eb2537fc..957c07687 100644
--- a/src/ImageSharp/ColorSpaces/Companding/Rec2020Companding.cs
+++ b/src/ImageSharp/ColorSpaces/Companding/Rec2020Companding.cs
@@ -38,4 +38,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
public static float Compress(float channel)
=> channel < Beta ? 4.5F * channel : (Alpha * MathF.Pow(channel, 0.45F)) - AlphaMinusOne;
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Companding/Rec709Companding.cs b/src/ImageSharp/ColorSpaces/Companding/Rec709Companding.cs
index cf6f97e44..8b511aa1c 100644
--- a/src/ImageSharp/ColorSpaces/Companding/Rec709Companding.cs
+++ b/src/ImageSharp/ColorSpaces/Companding/Rec709Companding.cs
@@ -34,4 +34,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
public static float Compress(float channel)
=> channel < 0.018F ? 4.5F * channel : (1.099F * MathF.Pow(channel, 0.45F)) - 0.099F;
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/CieConstants.cs b/src/ImageSharp/ColorSpaces/Conversion/CieConstants.cs
index a81845f21..0d3568a2a 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/CieConstants.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/CieConstants.cs
@@ -19,4 +19,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
///
public const float Kappa = 903.2963F;
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.HunterLab.cs b/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.HunterLab.cs
index 17cbcbbd5..147ffba70 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.HunterLab.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.HunterLab.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
@@ -429,4 +429,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return this.ToHunterLab(xyzColor);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.Lms.cs b/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.Lms.cs
index cb5907424..7f44a3e4b 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.Lms.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.Lms.cs
@@ -424,4 +424,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return this.ToLms(xyzColor);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CIeLchToCieLabConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CIeLchToCieLabConverter.cs
index 2b60b2861..0b6ca4071 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CIeLchToCieLabConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CIeLchToCieLabConverter.cs
@@ -30,4 +30,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieLab(l, a, b, input.WhitePoint);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndCieXyyConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndCieXyyConverter.cs
index 2e048031b..ea021d73c 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndCieXyyConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndCieXyyConverter.cs
@@ -51,4 +51,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieXyz(x, y, z);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndHunterLabConverterBase.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndHunterLabConverterBase.cs
index 761558676..7ed2d78d8 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndHunterLabConverterBase.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndHunterLabConverterBase.cs
@@ -42,4 +42,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return 100F * (70F / 218.11F) * (whitePoint.Y + whitePoint.Z);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndLmsConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndLmsConverter.cs
index 0a6ba15fe..22f081ccd 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndLmsConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndLmsConverter.cs
@@ -67,4 +67,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieXyz(vector);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLabConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLabConverter.cs
index 7a9016261..5f16a82a4 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLabConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLabConverter.cs
@@ -54,4 +54,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieLab(l, a, b, this.LabWhitePoint);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLuvConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLuvConverter.cs
index 45e7589ce..031d96e71 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLuvConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLuvConverter.cs
@@ -85,4 +85,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
private static float ComputeVp(in CieXyz input)
=> (9 * input.Y) / (input.X + (15 * input.Y) + (3 * input.Z));
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToHunterLabConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToHunterLabConverter.cs
index 2bf1bb720..0b70f8c85 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToHunterLabConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToHunterLabConverter.cs
@@ -64,4 +64,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new HunterLab(l, a, b, this.HunterLabWhitePoint);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToLinearRgbConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToLinearRgbConverter.cs
index b14705a2d..f6ee2b0d8 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToLinearRgbConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToLinearRgbConverter.cs
@@ -53,4 +53,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new LinearRgb(vector, this.TargetWorkingSpace);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CmykAndRgbConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CmykAndRgbConverter.cs
index 38c03ca18..72f543442 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CmykAndRgbConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CmykAndRgbConverter.cs
@@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new Cmyk(cmy.X, cmy.Y, cmy.Z, k.X);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/YCbCrAndRgbConverter.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/YCbCrAndRgbConverter.cs
index 0ae244848..3f90e8d71 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/YCbCrAndRgbConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/YCbCrAndRgbConverter.cs
@@ -54,4 +54,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new YCbCr(y, cb, cr);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Conversion/Implementation/IChromaticAdaptation.cs b/src/ImageSharp/ColorSpaces/Conversion/Implementation/IChromaticAdaptation.cs
index 62833475d..b787c48b3 100644
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/IChromaticAdaptation.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/IChromaticAdaptation.cs
@@ -36,4 +36,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
CieXyz sourceWhitePoint,
in CieXyz destinationWhitePoint);
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Hsl.cs b/src/ImageSharp/ColorSpaces/Hsl.cs
index 9df5b4656..740752e6d 100644
--- a/src/ImageSharp/ColorSpaces/Hsl.cs
+++ b/src/ImageSharp/ColorSpaces/Hsl.cs
@@ -101,4 +101,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.L.Equals(other.L);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Hsv.cs b/src/ImageSharp/ColorSpaces/Hsv.cs
index 40474621a..d29e4b5b7 100644
--- a/src/ImageSharp/ColorSpaces/Hsv.cs
+++ b/src/ImageSharp/ColorSpaces/Hsv.cs
@@ -99,4 +99,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.V.Equals(other.V);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/HunterLab.cs b/src/ImageSharp/ColorSpaces/HunterLab.cs
index 4a0acadf4..a36ad4b9e 100644
--- a/src/ImageSharp/ColorSpaces/HunterLab.cs
+++ b/src/ImageSharp/ColorSpaces/HunterLab.cs
@@ -135,4 +135,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.WhitePoint.Equals(other.WhitePoint);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Illuminants.cs b/src/ImageSharp/ColorSpaces/Illuminants.cs
index 11b66d43b..f22ab9cd0 100644
--- a/src/ImageSharp/ColorSpaces/Illuminants.cs
+++ b/src/ImageSharp/ColorSpaces/Illuminants.cs
@@ -69,4 +69,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
///
public static readonly CieXyz F11 = new CieXyz(1.00962F, 1F, 0.64350F);
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/ColorSpaces/Lms.cs b/src/ImageSharp/ColorSpaces/Lms.cs
index fa6800343..e0068c92f 100644
--- a/src/ImageSharp/ColorSpaces/Lms.cs
+++ b/src/ImageSharp/ColorSpaces/Lms.cs
@@ -104,4 +104,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.S.Equals(other.S);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Common/Constants.cs b/src/ImageSharp/Common/Constants.cs
index fd2636100..90f33fdf7 100644
--- a/src/ImageSharp/Common/Constants.cs
+++ b/src/ImageSharp/Common/Constants.cs
@@ -18,4 +18,4 @@ namespace SixLabors.ImageSharp
///
public static readonly float EpsilonSquared = Epsilon * Epsilon;
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Common/Helpers/InliningOptions.cs b/src/ImageSharp/Common/Helpers/InliningOptions.cs
index 4bc8ef3c8..1ae880787 100644
--- a/src/ImageSharp/Common/Helpers/InliningOptions.cs
+++ b/src/ImageSharp/Common/Helpers/InliningOptions.cs
@@ -12,6 +12,10 @@ namespace SixLabors.ImageSharp
///
internal static class InliningOptions
{
+ ///
+ /// regardless of the build conditions.
+ ///
+ public const MethodImplOptions AlwaysInline = MethodImplOptions.AggressiveInlining;
#if PROFILING
public const MethodImplOptions HotPath = MethodImplOptions.NoInlining;
public const MethodImplOptions ShortMethod = MethodImplOptions.NoInlining;
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index b530a37e7..cd96b51e9 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -537,7 +537,7 @@ namespace SixLabors.ImageSharp
/// The first vector to multiply.
/// The second vector to multiply.
/// The .
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(InliningOptions.AlwaysInline)]
public static Vector256 MultiplyAdd(
in Vector256 va,
in Vector256 vm0,
@@ -622,90 +622,89 @@ namespace SixLabors.ImageSharp
ReadOnlySpan source,
Span dest)
{
- if (Avx2.IsSupported)
+ fixed (byte* sourceBase = source)
{
- VerifySpanInput(source, dest, Vector256.Count);
-
- int n = dest.Length / Vector256.Count;
+ if (Avx2.IsSupported)
+ {
+ VerifySpanInput(source, dest, Vector256.Count);
- byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
+ int n = dest.Length / Vector256.Count;
- ref Vector256 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ ref Vector256 destBase =
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
- var scale = Vector256.Create(1 / (float)byte.MaxValue);
+ var scale = Vector256.Create(1 / (float)byte.MaxValue);
- for (int i = 0; i < n; i++)
- {
- int si = Vector256.Count * i;
- Vector256 i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
- Vector256 i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256.Count);
- Vector256 i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 2));
- Vector256 i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 3));
-
- Vector256 f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
- Vector256 f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
- Vector256 f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
- Vector256 f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
-
- ref Vector256 d = ref Unsafe.Add(ref destBase, i * 4);
-
- d = f0;
- Unsafe.Add(ref d, 1) = f1;
- Unsafe.Add(ref d, 2) = f2;
- Unsafe.Add(ref d, 3) = f3;
+ for (int i = 0; i < n; i++)
+ {
+ int si = Vector256.Count * i;
+ Vector256 i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
+ Vector256 i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256.Count);
+ Vector256 i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 2));
+ Vector256 i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 3));
+
+ Vector256 f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
+ Vector256 f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
+ Vector256 f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
+ Vector256 f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
+
+ ref Vector256 d = ref Unsafe.Add(ref destBase, i * 4);
+
+ d = f0;
+ Unsafe.Add(ref d, 1) = f1;
+ Unsafe.Add(ref d, 2) = f2;
+ Unsafe.Add(ref d, 3) = f3;
+ }
}
- }
- else
- {
- // Sse
- VerifySpanInput(source, dest, Vector128.Count);
-
- int n = dest.Length / Vector128.Count;
-
- byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
+ else
+ {
+ // Sse
+ VerifySpanInput(source, dest, Vector128.Count);
- ref Vector128 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ int n = dest.Length / Vector128.Count;
- var scale = Vector128.Create(1 / (float)byte.MaxValue);
- Vector128 zero = Vector128.Zero;
+ ref Vector128 destBase =
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
- for (int i = 0; i < n; i++)
- {
- int si = Vector128.Count * i;
+ var scale = Vector128.Create(1 / (float)byte.MaxValue);
+ Vector128 zero = Vector128.Zero;
- Vector128 i0, i1, i2, i3;
- if (Sse41.IsSupported)
- {
- i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
- i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128.Count);
- i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 2));
- i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 3));
- }
- else
+ for (int i = 0; i < n; i++)
{
- Vector128 b = Sse2.LoadVector128(sourceBase + si);
- Vector128 s0 = Sse2.UnpackLow(b, zero).AsInt16();
- Vector128 s1 = Sse2.UnpackHigh(b, zero).AsInt16();
-
- i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
- i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
- i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
- i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
+ int si = Vector128.Count * i;
+
+ Vector128 i0, i1, i2, i3;
+ if (Sse41.IsSupported)
+ {
+ i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
+ i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128.Count);
+ i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 2));
+ i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 3));
+ }
+ else
+ {
+ Vector128 b = Sse2.LoadVector128(sourceBase + si);
+ Vector128 s0 = Sse2.UnpackLow(b, zero).AsInt16();
+ Vector128 s1 = Sse2.UnpackHigh(b, zero).AsInt16();
+
+ i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
+ i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
+ i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
+ i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
+ }
+
+ Vector128 f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
+ Vector128 f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
+ Vector128 f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
+ Vector128 f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
+
+ ref Vector128 d = ref Unsafe.Add(ref destBase, i * 4);
+
+ d = f0;
+ Unsafe.Add(ref d, 1) = f1;
+ Unsafe.Add(ref d, 2) = f2;
+ Unsafe.Add(ref d, 3) = f3;
}
-
- Vector128 f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
- Vector128 f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
- Vector128 f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
- Vector128 f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
-
- ref Vector128 d = ref Unsafe.Add(ref destBase, i * 4);
-
- d = f0;
- Unsafe.Add(ref d, 1) = f1;
- Unsafe.Add(ref d, 2) = f2;
- Unsafe.Add(ref d, 3) = f3;
}
}
}
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
index fe02bd007..1ccf5ab1a 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
@@ -5,9 +5,7 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
-
#if SUPPORTS_RUNTIME_INTRINSICS
-using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
@@ -203,4 +201,4 @@ namespace SixLabors.ImageSharp
}
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Compression/Zlib/Adler32.cs b/src/ImageSharp/Compression/Zlib/Adler32.cs
index 9b3abd298..7eb3f4516 100644
--- a/src/ImageSharp/Compression/Zlib/Adler32.cs
+++ b/src/ImageSharp/Compression/Zlib/Adler32.cs
@@ -91,115 +91,117 @@ namespace SixLabors.ImageSharp.Compression.Zlib
int index = 0;
fixed (byte* bufferPtr = buffer)
- fixed (byte* tapPtr = Tap1Tap2)
{
- index += (int)blocks * BLOCK_SIZE;
- var localBufferPtr = bufferPtr;
-
- // _mm_setr_epi8 on x86
- Vector128 tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
- Vector128 tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
- Vector128 zero = Vector128.Zero;
- var ones = Vector128.Create((short)1);
-
- while (blocks > 0)
+ fixed (byte* tapPtr = Tap1Tap2)
{
- uint n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */
- if (n > blocks)
- {
- n = blocks;
- }
+ index += (int)blocks * BLOCK_SIZE;
+ var localBufferPtr = bufferPtr;
- blocks -= n;
+ // _mm_setr_epi8 on x86
+ Vector128 tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
+ Vector128 tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
+ Vector128 zero = Vector128.Zero;
+ var ones = Vector128.Create((short)1);
- // Process n blocks of data. At most NMAX data bytes can be
- // processed before s2 must be reduced modulo BASE.
- Vector128 v_ps = Vector128.CreateScalar(s1 * n);
- Vector128 v_s2 = Vector128.CreateScalar(s2);
- Vector128 v_s1 = Vector128.Zero;
-
- do
+ while (blocks > 0)
{
- // Load 32 input bytes.
- Vector128 bytes1 = Sse3.LoadDquVector128(localBufferPtr);
- Vector128 bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
+ uint n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */
+ if (n > blocks)
+ {
+ n = blocks;
+ }
- // Add previous block byte sum to v_ps.
- v_ps = Sse2.Add(v_ps, v_s1);
+ blocks -= n;
- // Horizontally add the bytes for s1, multiply-adds the
- // bytes by [ 32, 31, 30, ... ] for s2.
- v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
- Vector128 mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
- v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
+ // Process n blocks of data. At most NMAX data bytes can be
+ // processed before s2 must be reduced modulo BASE.
+ Vector128 v_ps = Vector128.CreateScalar(s1 * n);
+ Vector128 v_s2 = Vector128.CreateScalar(s2);
+ Vector128 v_s1 = Vector128.Zero;
- v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
- Vector128 mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
- v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
+ do
+ {
+ // Load 32 input bytes.
+ Vector128 bytes1 = Sse3.LoadDquVector128(localBufferPtr);
+ Vector128 bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
- localBufferPtr += BLOCK_SIZE;
- }
- while (--n > 0);
+ // Add previous block byte sum to v_ps.
+ v_ps = Sse2.Add(v_ps, v_s1);
- v_s2 = Sse2.Add(v_s2, Sse2.ShiftLeftLogical(v_ps, 5));
+ // Horizontally add the bytes for s1, multiply-adds the
+ // bytes by [ 32, 31, 30, ... ] for s2.
+ v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
+ Vector128 mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
+ v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
- // Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
- const byte S2301 = 0b1011_0001; // A B C D -> B A D C
- const byte S1032 = 0b0100_1110; // A B C D -> C D A B
+ v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
+ Vector128 mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
+ v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
- v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
+ localBufferPtr += BLOCK_SIZE;
+ }
+ while (--n > 0);
- s1 += v_s1.ToScalar();
+ v_s2 = Sse2.Add(v_s2, Sse2.ShiftLeftLogical(v_ps, 5));
- v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
- v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
+ // Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
+ const byte S2301 = 0b1011_0001; // A B C D -> B A D C
+ const byte S1032 = 0b0100_1110; // A B C D -> C D A B
- s2 = v_s2.ToScalar();
+ v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
- // Reduce.
- s1 %= BASE;
- s2 %= BASE;
- }
+ s1 += v_s1.ToScalar();
- if (length > 0)
- {
- if (length >= 16)
- {
- s2 += s1 += localBufferPtr[0];
- s2 += s1 += localBufferPtr[1];
- s2 += s1 += localBufferPtr[2];
- s2 += s1 += localBufferPtr[3];
- s2 += s1 += localBufferPtr[4];
- s2 += s1 += localBufferPtr[5];
- s2 += s1 += localBufferPtr[6];
- s2 += s1 += localBufferPtr[7];
- s2 += s1 += localBufferPtr[8];
- s2 += s1 += localBufferPtr[9];
- s2 += s1 += localBufferPtr[10];
- s2 += s1 += localBufferPtr[11];
- s2 += s1 += localBufferPtr[12];
- s2 += s1 += localBufferPtr[13];
- s2 += s1 += localBufferPtr[14];
- s2 += s1 += localBufferPtr[15];
+ v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
+ v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
- localBufferPtr += 16;
- length -= 16;
- }
+ s2 = v_s2.ToScalar();
- while (length-- > 0)
- {
- s2 += s1 += *localBufferPtr++;
+ // Reduce.
+ s1 %= BASE;
+ s2 %= BASE;
}
- if (s1 >= BASE)
+ if (length > 0)
{
- s1 -= BASE;
+ if (length >= 16)
+ {
+ s2 += s1 += localBufferPtr[0];
+ s2 += s1 += localBufferPtr[1];
+ s2 += s1 += localBufferPtr[2];
+ s2 += s1 += localBufferPtr[3];
+ s2 += s1 += localBufferPtr[4];
+ s2 += s1 += localBufferPtr[5];
+ s2 += s1 += localBufferPtr[6];
+ s2 += s1 += localBufferPtr[7];
+ s2 += s1 += localBufferPtr[8];
+ s2 += s1 += localBufferPtr[9];
+ s2 += s1 += localBufferPtr[10];
+ s2 += s1 += localBufferPtr[11];
+ s2 += s1 += localBufferPtr[12];
+ s2 += s1 += localBufferPtr[13];
+ s2 += s1 += localBufferPtr[14];
+ s2 += s1 += localBufferPtr[15];
+
+ localBufferPtr += 16;
+ length -= 16;
+ }
+
+ while (length-- > 0)
+ {
+ s2 += s1 += *localBufferPtr++;
+ }
+
+ if (s1 >= BASE)
+ {
+ s1 -= BASE;
+ }
+
+ s2 %= BASE;
}
- s2 %= BASE;
+ return s1 | (s2 << 16);
}
-
- return s1 | (s2 << 16);
}
}
#endif
diff --git a/src/ImageSharp/Compression/Zlib/Crc32.cs b/src/ImageSharp/Compression/Zlib/Crc32.cs
index 0ba368df6..075d6112a 100644
--- a/src/ImageSharp/Compression/Zlib/Crc32.cs
+++ b/src/ImageSharp/Compression/Zlib/Crc32.cs
@@ -83,117 +83,119 @@ namespace SixLabors.ImageSharp.Compression.Zlib
int length = chunksize;
fixed (byte* bufferPtr = buffer)
- fixed (ulong* k05PolyPtr = K05Poly)
{
- byte* localBufferPtr = bufferPtr;
- ulong* localK05PolyPtr = k05PolyPtr;
-
- // There's at least one block of 64.
- Vector128 x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
- Vector128 x2 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
- Vector128 x3 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
- Vector128 x4 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
- Vector128 x5;
-
- x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64());
-
- // k1, k2
- Vector128 x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0);
-
- localBufferPtr += 64;
- length -= 64;
-
- // Parallel fold blocks of 64, if any.
- while (length >= 64)
+ fixed (ulong* k05PolyPtr = K05Poly)
{
- x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
- Vector128 x6 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
- Vector128 x7 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x00);
- Vector128 x8 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x00);
-
- x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
- x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x11);
- x3 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x11);
- x4 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x11);
+ byte* localBufferPtr = bufferPtr;
+ ulong* localK05PolyPtr = k05PolyPtr;
- Vector128 y5 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
- Vector128 y6 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
- Vector128 y7 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
- Vector128 y8 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
+ // There's at least one block of 64.
+ Vector128 x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
+ Vector128 x2 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
+ Vector128 x3 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
+ Vector128 x4 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
+ Vector128 x5;
- x1 = Sse2.Xor(x1, x5);
- x2 = Sse2.Xor(x2, x6);
- x3 = Sse2.Xor(x3, x7);
- x4 = Sse2.Xor(x4, x8);
+ x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64());
- x1 = Sse2.Xor(x1, y5);
- x2 = Sse2.Xor(x2, y6);
- x3 = Sse2.Xor(x3, y7);
- x4 = Sse2.Xor(x4, y8);
+ // k1, k2
+ Vector128 x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0);
localBufferPtr += 64;
length -= 64;
- }
-
- // Fold into 128-bits.
- // k3, k4
- x0 = Sse2.LoadVector128(k05PolyPtr + 0x2);
- x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
- x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
- x1 = Sse2.Xor(x1, x2);
- x1 = Sse2.Xor(x1, x5);
+ // Parallel fold blocks of 64, if any.
+ while (length >= 64)
+ {
+ x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+ Vector128 x6 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
+ Vector128 x7 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x00);
+ Vector128 x8 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x00);
+
+ x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+ x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x11);
+ x3 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x11);
+ x4 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x11);
+
+ Vector128 y5 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
+ Vector128 y6 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
+ Vector128 y7 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
+ Vector128 y8 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
+
+ x1 = Sse2.Xor(x1, x5);
+ x2 = Sse2.Xor(x2, x6);
+ x3 = Sse2.Xor(x3, x7);
+ x4 = Sse2.Xor(x4, x8);
+
+ x1 = Sse2.Xor(x1, y5);
+ x2 = Sse2.Xor(x2, y6);
+ x3 = Sse2.Xor(x3, y7);
+ x4 = Sse2.Xor(x4, y8);
+
+ localBufferPtr += 64;
+ length -= 64;
+ }
+
+ // Fold into 128-bits.
+ // k3, k4
+ x0 = Sse2.LoadVector128(k05PolyPtr + 0x2);
- x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
- x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
- x1 = Sse2.Xor(x1, x3);
- x1 = Sse2.Xor(x1, x5);
-
- x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
- x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
- x1 = Sse2.Xor(x1, x4);
- x1 = Sse2.Xor(x1, x5);
+ x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+ x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+ x1 = Sse2.Xor(x1, x2);
+ x1 = Sse2.Xor(x1, x5);
- // Single fold blocks of 16, if any.
- while (length >= 16)
- {
- x2 = Sse2.LoadVector128((ulong*)localBufferPtr);
+ x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+ x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+ x1 = Sse2.Xor(x1, x3);
+ x1 = Sse2.Xor(x1, x5);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
- x1 = Sse2.Xor(x1, x2);
+ x1 = Sse2.Xor(x1, x4);
x1 = Sse2.Xor(x1, x5);
- localBufferPtr += 16;
- length -= 16;
- }
+ // Single fold blocks of 16, if any.
+ while (length >= 16)
+ {
+ x2 = Sse2.LoadVector128((ulong*)localBufferPtr);
+
+ x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+ x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+ x1 = Sse2.Xor(x1, x2);
+ x1 = Sse2.Xor(x1, x5);
- // Fold 128 - bits to 64 - bits.
- x2 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x10);
- x3 = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); // _mm_setr_epi32 on x86
- x1 = Sse2.ShiftRightLogical128BitLane(x1, 8);
- x1 = Sse2.Xor(x1, x2);
+ localBufferPtr += 16;
+ length -= 16;
+ }
- // k5, k0
- x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4);
+ // Fold 128 - bits to 64 - bits.
+ x2 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x10);
+ x3 = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); // _mm_setr_epi32 on x86
+ x1 = Sse2.ShiftRightLogical128BitLane(x1, 8);
+ x1 = Sse2.Xor(x1, x2);
- x2 = Sse2.ShiftRightLogical128BitLane(x1, 4);
- x1 = Sse2.And(x1, x3);
- x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
- x1 = Sse2.Xor(x1, x2);
+ // k5, k0
+ x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4);
- // Barret reduce to 32-bits.
- // polynomial
- x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6);
+ x2 = Sse2.ShiftRightLogical128BitLane(x1, 4);
+ x1 = Sse2.And(x1, x3);
+ x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+ x1 = Sse2.Xor(x1, x2);
- x2 = Sse2.And(x1, x3);
- x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10);
- x2 = Sse2.And(x2, x3);
- x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
- x1 = Sse2.Xor(x1, x2);
+ // Barret reduce to 32-bits.
+ // polynomial
+ x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6);
- crc = (uint)Sse41.Extract(x1.AsInt32(), 1);
- return buffer.Length - chunksize == 0 ? crc : CalculateScalar(crc, buffer.Slice(chunksize));
+ x2 = Sse2.And(x1, x3);
+ x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10);
+ x2 = Sse2.And(x2, x3);
+ x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
+ x1 = Sse2.Xor(x1, x2);
+
+ crc = (uint)Sse41.Extract(x1.AsInt32(), 1);
+ return buffer.Length - chunksize == 0 ? crc : CalculateScalar(crc, buffer.Slice(chunksize));
+ }
}
}
#endif
diff --git a/src/ImageSharp/Formats/Bmp/BmpConfigurationModule.cs b/src/ImageSharp/Formats/Bmp/BmpConfigurationModule.cs
index 5505cd5e6..0bec34ffb 100644
--- a/src/ImageSharp/Formats/Bmp/BmpConfigurationModule.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpConfigurationModule.cs
@@ -16,4 +16,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
configuration.ImageFormatsManager.AddImageFormatDetector(new BmpImageFormatDetector());
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Bmp/BmpConstants.cs b/src/ImageSharp/Formats/Bmp/BmpConstants.cs
index d6c86e4db..0b9499eeb 100644
--- a/src/ImageSharp/Formats/Bmp/BmpConstants.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpConstants.cs
@@ -56,4 +56,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
public const int Pointer = 0x5450;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Bmp/BmpFormat.cs b/src/ImageSharp/Formats/Bmp/BmpFormat.cs
index 9e367c6da..d92a73104 100644
--- a/src/ImageSharp/Formats/Bmp/BmpFormat.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpFormat.cs
@@ -34,4 +34,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
///
public BmpMetadata CreateDefaultFormatMetadata() => new BmpMetadata();
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Bmp/BmpMetadata.cs b/src/ImageSharp/Formats/Bmp/BmpMetadata.cs
index 50cf32fcb..b7b668a7a 100644
--- a/src/ImageSharp/Formats/Bmp/BmpMetadata.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpMetadata.cs
@@ -40,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
// TODO: Colors used once we support encoding palette bmps.
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Bmp/IBmpDecoderOptions.cs b/src/ImageSharp/Formats/Bmp/IBmpDecoderOptions.cs
index d359e9f1d..ff88d15a3 100644
--- a/src/ImageSharp/Formats/Bmp/IBmpDecoderOptions.cs
+++ b/src/ImageSharp/Formats/Bmp/IBmpDecoderOptions.cs
@@ -13,4 +13,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
///
RleSkippedPixelHandling RleSkippedPixelHandling { get; }
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/GifConfigurationModule.cs b/src/ImageSharp/Formats/Gif/GifConfigurationModule.cs
index b08a3c38e..8f846f9d5 100644
--- a/src/ImageSharp/Formats/Gif/GifConfigurationModule.cs
+++ b/src/ImageSharp/Formats/Gif/GifConfigurationModule.cs
@@ -16,4 +16,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
configuration.ImageFormatsManager.AddImageFormatDetector(new GifImageFormatDetector());
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
index e59dad682..482a76153 100644
--- a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
@@ -8,7 +8,6 @@ using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
-
using SixLabors.ImageSharp.IO;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Metadata;
diff --git a/src/ImageSharp/Formats/Gif/GifDisposalMethod.cs b/src/ImageSharp/Formats/Gif/GifDisposalMethod.cs
index b57491cf9..2211dfe4b 100644
--- a/src/ImageSharp/Formats/Gif/GifDisposalMethod.cs
+++ b/src/ImageSharp/Formats/Gif/GifDisposalMethod.cs
@@ -35,4 +35,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
///
RestoreToPrevious = 3
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/GifFormat.cs b/src/ImageSharp/Formats/Gif/GifFormat.cs
index 4ff53a409..459f0068b 100644
--- a/src/ImageSharp/Formats/Gif/GifFormat.cs
+++ b/src/ImageSharp/Formats/Gif/GifFormat.cs
@@ -37,4 +37,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
///
public GifFrameMetadata CreateDefaultFormatFrameMetadata() => new GifFrameMetadata();
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/GifImageFormatDetector.cs b/src/ImageSharp/Formats/Gif/GifImageFormatDetector.cs
index 3b3dd0bf1..736b9246d 100644
--- a/src/ImageSharp/Formats/Gif/GifImageFormatDetector.cs
+++ b/src/ImageSharp/Formats/Gif/GifImageFormatDetector.cs
@@ -30,4 +30,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
header[5] == 0x61; // a
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/LzwEncoder.cs b/src/ImageSharp/Formats/Gif/LzwEncoder.cs
index 195a84a1d..e9fb7ab00 100644
--- a/src/ImageSharp/Formats/Gif/LzwEncoder.cs
+++ b/src/ImageSharp/Formats/Gif/LzwEncoder.cs
@@ -6,7 +6,6 @@ using System.Buffers;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Gif
diff --git a/src/ImageSharp/Formats/Gif/Sections/GifGraphicControlExtension.cs b/src/ImageSharp/Formats/Gif/Sections/GifGraphicControlExtension.cs
index 77b32f77d..ee5a43d80 100644
--- a/src/ImageSharp/Formats/Gif/Sections/GifGraphicControlExtension.cs
+++ b/src/ImageSharp/Formats/Gif/Sections/GifGraphicControlExtension.cs
@@ -103,4 +103,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
return value;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/Sections/GifImageDescriptor.cs b/src/ImageSharp/Formats/Gif/Sections/GifImageDescriptor.cs
index 68b048482..1eaebe11d 100644
--- a/src/ImageSharp/Formats/Gif/Sections/GifImageDescriptor.cs
+++ b/src/ImageSharp/Formats/Gif/Sections/GifImageDescriptor.cs
@@ -113,4 +113,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
return value;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/Sections/GifLogicalScreenDescriptor.cs b/src/ImageSharp/Formats/Gif/Sections/GifLogicalScreenDescriptor.cs
index 88c13d203..e3bc2e883 100644
--- a/src/ImageSharp/Formats/Gif/Sections/GifLogicalScreenDescriptor.cs
+++ b/src/ImageSharp/Formats/Gif/Sections/GifLogicalScreenDescriptor.cs
@@ -130,4 +130,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
return value;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Gif/Sections/IGifExtension.cs b/src/ImageSharp/Formats/Gif/Sections/IGifExtension.cs
index bec188123..5a15a6dfa 100644
--- a/src/ImageSharp/Formats/Gif/Sections/IGifExtension.cs
+++ b/src/ImageSharp/Formats/Gif/Sections/IGifExtension.cs
@@ -22,4 +22,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
/// The number of bytes written to the buffer.
int WriteTo(Span buffer);
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/IImageFormat.cs b/src/ImageSharp/Formats/IImageFormat.cs
index 06b96caad..812984ba8 100644
--- a/src/ImageSharp/Formats/IImageFormat.cs
+++ b/src/ImageSharp/Formats/IImageFormat.cs
@@ -60,4 +60,4 @@ namespace SixLabors.ImageSharp.Formats
/// The .
TFormatFrameMetadata CreateDefaultFormatFrameMetadata();
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/AdobeMarker.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/AdobeMarker.cs
index 00ab48e25..b41d52aa4 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/AdobeMarker.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/AdobeMarker.cs
@@ -107,4 +107,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.ColorTransform);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
index f9334de73..216c12735 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
@@ -22,60 +22,39 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- protected override void ConvertCoreVectorized(in ComponentValues values, Span result)
+ protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
- ref Vector256 cBase =
+ ref Vector256 c0Base =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0));
- ref Vector256 mBase =
+ ref Vector256 c1Base =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1));
- ref Vector256 yBase =
+ ref Vector256 c2Base =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2));
- ref Vector256 kBase =
+ ref Vector256 c3Base =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3));
- ref Vector256 resultBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(result));
-
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
- var one = Vector256.Create(1F);
-
- // Used for packing
- ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
- Vector256 vcontrol = Unsafe.As>(ref control);
- int n = result.Length / 8;
- for (int i = 0; i < n; i++)
+ nint n = values.Component0.Length / 8;
+ for (nint i = 0; i < n; i++)
{
- Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
- Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
- Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
- Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
+ ref Vector256 c = ref Unsafe.Add(ref c0Base, i);
+ ref Vector256 m = ref Unsafe.Add(ref c1Base, i);
+ ref Vector256 y = ref Unsafe.Add(ref c2Base, i);
+ Vector256 k = Unsafe.Add(ref c3Base, i);
k = Avx.Multiply(k, scale);
-
c = Avx.Multiply(Avx.Multiply(c, k), scale);
m = Avx.Multiply(Avx.Multiply(m, k), scale);
y = Avx.Multiply(Avx.Multiply(y, k), scale);
-
- Vector256 cmLo = Avx.UnpackLow(c, m);
- Vector256 yoLo = Avx.UnpackLow(y, one);
- Vector256 cmHi = Avx.UnpackHigh(c, m);
- Vector256 yoHi = Avx.UnpackHigh(y, one);
-
- ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4);
-
- destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
- Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
- Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
- Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
}
#endif
}
- protected override void ConvertCore(in ComponentValues values, Span result) =>
- FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
+ protected override void ConvertCoreInplace(in ComponentValues values) =>
+ FromCmykBasic.ConvertCoreInplace(values, this.MaximumValue);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
index 6cbd52ec3..b0ad50301 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
@@ -15,38 +15,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- public override void ConvertToRgba(in ComponentValues values, Span result)
- {
- ConvertCore(values, result, this.MaximumValue);
- }
+ public override void ConvertToRgbInplace(in ComponentValues values) =>
+ ConvertCoreInplace(values, this.MaximumValue);
- internal static void ConvertCore(in ComponentValues values, Span result, float maxValue)
+ internal static void ConvertCoreInplace(in ComponentValues values, float maxValue)
{
- ReadOnlySpan cVals = values.Component0;
- ReadOnlySpan mVals = values.Component1;
- ReadOnlySpan yVals = values.Component2;
- ReadOnlySpan kVals = values.Component3;
-
- var v = new Vector4(0, 0, 0, 1F);
-
- var maximum = 1 / maxValue;
- var scale = new Vector4(maximum, maximum, maximum, 1F);
+ Span c0 = values.Component0;
+ Span c1 = values.Component1;
+ Span c2 = values.Component2;
+ Span c3 = values.Component3;
- for (int i = 0; i < result.Length; i++)
+ float scale = 1 / maxValue;
+ for (int i = 0; i < c0.Length; i++)
{
- float c = cVals[i];
- float m = mVals[i];
- float y = yVals[i];
- float k = kVals[i] / maxValue;
-
- v.X = c * k;
- v.Y = m * k;
- v.Z = y * k;
- v.W = 1F;
-
- v *= scale;
-
- result[i] = v;
+ float c = c0[i];
+ float m = c1[i];
+ float y = c2[i];
+ float k = c3[i] / maxValue;
+
+ c0[i] = c * k * scale;
+ c1[i] = m * k * scale;
+ c2[i] = y * k * scale;
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
index e75634b0f..0da4c9ec2 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
@@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- protected override void ConvertCoreVectorized(in ComponentValues values, Span result)
+ protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
ref Vector cBase =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0));
@@ -29,43 +29,25 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector kBase =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3));
- ref Vector4Octet resultBase =
- ref Unsafe.As(ref MemoryMarshal.GetReference(result));
-
- Vector4Pair cc = default;
- Vector4Pair mm = default;
- Vector4Pair yy = default;
- ref Vector ccRefAsVector = ref Unsafe.As>(ref cc);
- ref Vector mmRefAsVector = ref Unsafe.As>(ref mm);
- ref Vector yyRefAsVector = ref Unsafe.As>(ref yy);
-
var scale = new Vector(1 / this.MaximumValue);
// Walking 8 elements at one step:
- int n = result.Length / 8;
- for (int i = 0; i < n; i++)
+ nint n = values.Component0.Length / 8;
+ for (nint i = 0; i < n; i++)
{
- Vector c = Unsafe.Add(ref cBase, i);
- Vector m = Unsafe.Add(ref mBase, i);
- Vector y = Unsafe.Add(ref yBase, i);
+ ref Vector c = ref Unsafe.Add(ref cBase, i);
+ ref Vector m = ref Unsafe.Add(ref mBase, i);
+ ref Vector y = ref Unsafe.Add(ref yBase, i);
Vector k = Unsafe.Add(ref kBase, i) * scale;
c = (c * k) * scale;
m = (m * k) * scale;
y = (y * k) * scale;
-
- ccRefAsVector = c;
- mmRefAsVector = m;
- yyRefAsVector = y;
-
- // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
- ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
- destination.Pack(ref cc, ref mm, ref yy);
}
}
- protected override void ConvertCore(in ComponentValues values, Span result) =>
- FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
+ protected override void ConvertCoreInplace(in ComponentValues values) =>
+ FromCmykBasic.ConvertCoreInplace(values, this.MaximumValue);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
index 45846a6b5..eca6b6292 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
@@ -22,42 +22,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- protected override void ConvertCoreVectorized(in ComponentValues values, Span result)
+ protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
- ref Vector256 gBase =
+ ref Vector256 c0Base =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0));
- ref Vector256 resultBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(result));
-
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
- var one = Vector256.Create(1F);
-
- // Used for packing
- ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
- Vector256 vcontrol = Unsafe.As>(ref control);
- int n = result.Length / 8;
- for (int i = 0; i < n; i++)
+ nint n = values.Component0.Length / 8;
+ for (nint i = 0; i < n; i++)
{
- Vector256 g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale);
-
- g = Avx2.PermuteVar8x32(g, vcontrol);
-
- ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4);
-
- destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
- Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000);
- Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000);
- Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000);
+ ref Vector256 c0 = ref Unsafe.Add(ref c0Base, i);
+ c0 = Avx.Multiply(c0, scale);
}
#endif
}
- protected override void ConvertCore(in ComponentValues values, Span result) =>
- FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
+ protected override void ConvertCoreInplace(in ComponentValues values) =>
+ FromGrayscaleBasic.ScaleValues(values.Component0, this.MaximumValue);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
index 0b7a220d9..76d57bf06 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
@@ -17,25 +17,35 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- public override void ConvertToRgba(in ComponentValues values, Span result)
- {
- ConvertCore(values, result, this.MaximumValue);
- }
+ public override void ConvertToRgbInplace(in ComponentValues values) =>
+ ScaleValues(values.Component0, this.MaximumValue);
- internal static void ConvertCore(in ComponentValues values, Span result, float maxValue)
+ internal static void ScaleValues(Span values, float maxValue)
{
- var maximum = 1 / maxValue;
- var scale = new Vector4(maximum, maximum, maximum, 1F);
+ Span vecValues = MemoryMarshal.Cast(values);
- ref float sBase = ref MemoryMarshal.GetReference(values.Component0);
- ref Vector4 dBase = ref MemoryMarshal.GetReference(result);
+ var scaleVector = new Vector4(1 / maxValue);
- for (int i = 0; i < result.Length; i++)
+ for (int i = 0; i < vecValues.Length; i++)
{
- var v = new Vector4(Unsafe.Add(ref sBase, i));
- v.W = 1f;
- v *= scale;
- Unsafe.Add(ref dBase, i) = v;
+ vecValues[i] *= scaleVector;
+ }
+
+ values = values.Slice(vecValues.Length * 4);
+ if (!values.IsEmpty)
+ {
+ float scaleValue = 1f / maxValue;
+ values[0] *= scaleValue;
+
+ if ((uint)values.Length > 1)
+ {
+ values[1] *= scaleValue;
+
+ if ((uint)values.Length > 2)
+ {
+ values[2] *= scaleValue;
+ }
+ }
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
index 8f04c9152..557e4e417 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
@@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- protected override void ConvertCoreVectorized(in ComponentValues values, Span result)
+ protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256 rBase =
@@ -32,41 +32,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector256 bBase =
ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2));
- ref Vector256 resultBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(result));
-
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
- var one = Vector256.Create(1F);
-
- // Used for packing
- ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
- Vector256 vcontrol = Unsafe.As>(ref control);
-
- int n = result.Length / 8;
- for (int i = 0; i < n; i++)
+ nint n = values.Component0.Length / 8;
+ for (nint i = 0; i < n; i++)
{
- Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale);
- Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale);
- Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale);
-
- Vector256 rgLo = Avx.UnpackLow(r, g);
- Vector256 boLo = Avx.UnpackLow(b, one);
- Vector256 rgHi = Avx.UnpackHigh(r, g);
- Vector256 boHi = Avx.UnpackHigh(b, one);
-
- ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4);
-
- destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00);
- Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10);
- Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00);
- Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10);
+ ref Vector256 r = ref Unsafe.Add(ref rBase, i);
+ ref Vector256 g = ref Unsafe.Add(ref gBase, i);
+ ref Vector256 b = ref Unsafe.Add(ref bBase, i);
+ r = Avx.Multiply(r, scale);
+ g = Avx.Multiply(g, scale);
+ b = Avx.Multiply(b, scale);
}
#endif
}
- protected override void ConvertCore(in ComponentValues values, Span result) =>
- FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
+ protected override void ConvertCoreInplace(in ComponentValues values) =>
+ FromRgbBasic.ConvertCoreInplace(values, this.MaximumValue);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
index ddca3fe2f..1425e7b58 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
@@ -3,6 +3,7 @@
using System;
using System.Numerics;
+using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
@@ -15,36 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- public override void ConvertToRgba(in ComponentValues values, Span result)
+ public override void ConvertToRgbInplace(in ComponentValues values)
{
- ConvertCore(values, result, this.MaximumValue);
+ ConvertCoreInplace(values, this.MaximumValue);
}
- internal static void ConvertCore(in ComponentValues values, Span result, float maxValue)
+ internal static void ConvertCoreInplace(ComponentValues values, float maxValue)
{
- ReadOnlySpan rVals = values.Component0;
- ReadOnlySpan gVals = values.Component1;
- ReadOnlySpan bVals = values.Component2;
-
- var v = new Vector4(0, 0, 0, 1);
-
- var maximum = 1 / maxValue;
- var scale = new Vector4(maximum, maximum, maximum, 1F);
-
- for (int i = 0; i < result.Length; i++)
- {
- float r = rVals[i];
- float g = gVals[i];
- float b = bVals[i];
-
- v.X = r;
- v.Y = g;
- v.Z = b;
-
- v *= scale;
-
- result[i] = v;
- }
+ FromGrayscaleBasic.ScaleValues(values.Component0, maxValue);
+ FromGrayscaleBasic.ScaleValues(values.Component1, maxValue);
+ FromGrayscaleBasic.ScaleValues(values.Component2, maxValue);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
index 763064d1e..a00361d97 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
@@ -18,50 +18,32 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
- protected override void ConvertCoreVectorized(in ComponentValues values, Span result)
+ protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
ref Vector rBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0));
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector gBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1));
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector bBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2));
-
- ref Vector4Octet resultBase =
- ref Unsafe.As(ref MemoryMarshal.GetReference(result));
-
- Vector4Pair rr = default;
- Vector4Pair gg = default;
- Vector4Pair bb = default;
- ref Vector rrRefAsVector = ref Unsafe.As>(ref rr);
- ref Vector ggRefAsVector = ref Unsafe.As>(ref gg);
- ref Vector bbRefAsVector = ref Unsafe.As>(ref bb);
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2));
var scale = new Vector(1 / this.MaximumValue);
// Walking 8 elements at one step:
- int n = result.Length / 8;
- for (int i = 0; i < n; i++)
+ nint n = values.Component0.Length / 8;
+ for (nint i = 0; i < n; i++)
{
- Vector r = Unsafe.Add(ref rBase, i);
- Vector g = Unsafe.Add(ref gBase, i);
- Vector b = Unsafe.Add(ref bBase, i);
+ ref Vector r = ref Unsafe.Add(ref rBase, i);
+ ref Vector