diff --git a/src/ImageSharp/Common/Extensions/SimdUtils.cs b/src/ImageSharp/Common/Extensions/SimdUtils.cs
deleted file mode 100644
index 7b77fefcac..0000000000
--- a/src/ImageSharp/Common/Extensions/SimdUtils.cs
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright (c) Six Labors and contributors.
-// Licensed under the Apache License, Version 2.0.
-
-using System;
-using System.Diagnostics;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-namespace SixLabors.ImageSharp
-{
- ///
- /// Various extension and utility methods for and utilizing SIMD capabilities
- ///
- internal static class SimdUtils
- {
- ///
- /// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte.
- ///
- public static bool IsAvx2CompatibleArchitecture => Vector.Count == 8 && Vector.Count == 8;
-
- internal static void GuardAvx2(string operation)
- {
- if (!IsAvx2CompatibleArchitecture)
- {
- throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
- }
- }
-
- ///
- /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static Vector4 PseudoRound(this Vector4 v)
- {
- var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1));
-
- return v + (sign * 0.5f);
- }
-
- ///
- /// Rounds all values in 'v' to the nearest integer following semantics.
- /// Source:
- ///
- /// https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110
- ///
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static Vector FastRound(this Vector x)
- {
- Vector magic0 = new Vector(int.MinValue); // 0x80000000
- Vector sgn0 = Vector.AsVectorSingle(magic0);
- Vector and0 = Vector.BitwiseAnd(sgn0, x);
- Vector or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f));
- Vector add0 = Vector.Add(x, or0);
- Vector sub0 = Vector.Subtract(add0, or0);
- return sub0;
- }
-
- ///
- /// Convert 'source.Length' values normalized into [0..1] from 'source' into 'dest' buffer of values.
- /// The values gonna be scaled up into [0-255] and rounded.
- /// Based on:
- ///
- /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
- ///
- ///
- internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan source, Span dest)
- {
- GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
-
- DebugGuard.IsTrue((source.Length % Vector.Count) == 0, nameof(source), "source.Length should be divisable by Vector.Count!");
-
- if (source.Length == 0)
- {
- return;
- }
-
- ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
- ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- int n = source.Length / 8;
-
- Vector magick = new Vector(32768.0f);
- Vector scale = new Vector(255f) / new Vector(256f);
-
- // need to copy to a temporary struct, because
- // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
- // does not work. TODO: This might be a CoreClr bug, need to ask/report
- var temp = default(Octet.OfUInt32);
- ref Vector tempRef = ref Unsafe.As>(ref temp);
-
- for (int i = 0; i < n; i++)
- {
- // union { float f; uint32_t i; } u;
- // u.f = 32768.0f + x * (255.0f / 256.0f);
- // return (uint8_t)u.i;
- Vector x = Unsafe.Add(ref srcBase, i);
- x = (x * scale) + magick;
- tempRef = x;
-
- ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
- d.LoadFrom(ref temp);
- }
- }
-
- ///
- /// Same as but clamps overflown values before conversion.
- ///
- internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest)
- {
- GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
-
- DebugGuard.IsTrue((source.Length % Vector.Count) == 0, nameof(source), "source.Length should be divisable by Vector.Count!");
-
- if (source.Length == 0)
- {
- return;
- }
-
- ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
- ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- int n = source.Length / 8;
-
- Vector magick = new Vector(32768.0f);
- Vector scale = new Vector(255f) / new Vector(256f);
-
- // need to copy to a temporary struct, because
- // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
- // does not work. TODO: This might be a CoreClr bug, need to ask/report
- var temp = default(Octet.OfUInt32);
- ref Vector tempRef = ref Unsafe.As>(ref temp);
-
- for (int i = 0; i < n; i++)
- {
- // union { float f; uint32_t i; } u;
- // u.f = 32768.0f + x * (255.0f / 256.0f);
- // return (uint8_t)u.i;
- Vector x = Unsafe.Add(ref srcBase, i);
- x = Vector.Max(x, Vector.Zero);
- x = Vector.Min(x, Vector.One);
-
- x = (x * scale) + magick;
- tempRef = x;
-
- ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
- d.LoadFrom(ref temp);
- }
- }
-
- // TODO: Replace these with T4-d library level tuples!
- internal static class Octet
- {
- [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
- public struct OfUInt32
- {
- [FieldOffset(0 * sizeof(uint))]
- public uint V0;
-
- [FieldOffset(1 * sizeof(uint))]
- public uint V1;
-
- [FieldOffset(2 * sizeof(uint))]
- public uint V2;
-
- [FieldOffset(3 * sizeof(uint))]
- public uint V3;
-
- [FieldOffset(4 * sizeof(uint))]
- public uint V4;
-
- [FieldOffset(5 * sizeof(uint))]
- public uint V5;
-
- [FieldOffset(6 * sizeof(uint))]
- public uint V6;
-
- [FieldOffset(7 * sizeof(uint))]
- public uint V7;
-
- public override string ToString()
- {
- return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
- }
- }
-
- [StructLayout(LayoutKind.Explicit, Size = 8)]
- public struct OfByte
- {
- [FieldOffset(0)]
- public byte V0;
-
- [FieldOffset(1)]
- public byte V1;
-
- [FieldOffset(2)]
- public byte V2;
-
- [FieldOffset(3)]
- public byte V3;
-
- [FieldOffset(4)]
- public byte V4;
-
- [FieldOffset(5)]
- public byte V5;
-
- [FieldOffset(6)]
- public byte V6;
-
- [FieldOffset(7)]
- public byte V7;
-
- public override string ToString()
- {
- return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
- }
-
- public void LoadFrom(ref OfUInt32 i)
- {
- this.V0 = (byte)i.V0;
- this.V1 = (byte)i.V1;
- this.V2 = (byte)i.V2;
- this.V3 = (byte)i.V3;
- this.V4 = (byte)i.V4;
- this.V5 = (byte)i.V5;
- this.V6 = (byte)i.V6;
- this.V7 = (byte)i.V7;
- }
- }
- }
- }
-}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs
index 35769d96a7..1dc7405677 100644
--- a/src/ImageSharp/Common/Helpers/ImageMaths.cs
+++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs
@@ -14,6 +14,73 @@ namespace SixLabors.ImageSharp
///
internal static class ImageMaths
{
+ ///
+ /// Gets the luminance from the rgb components using the formula as specified by ITU-R Recommendation BT.709.
+ ///
+ /// The red component.
+ /// The green component.
+ /// The blue component.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static byte Get8BitBT709Luminance(byte r, byte g, byte b) => (byte)((r * .2126F) + (g * .7152F) + (b * .0722F));
+
+ ///
+ /// Gets the luminance from the rgb components using the formula as specified by ITU-R Recommendation BT.709.
+ ///
+ /// The red component.
+ /// The green component.
+ /// The blue component.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static ushort Get16BitBT709Luminance(ushort r, ushort g, ushort b) => (ushort)((r * .2126F) + (g * .7152F) + (b * .0722F));
+
+ ///
+ /// Scales a value from a 16 bit to it's 8 bit equivalent.
+ ///
+ /// The 8 bit compoonent value.
+ /// The
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static byte DownScaleFrom16BitTo8Bit(ushort component)
+ {
+ // To scale to 8 bits From a 16-bit value V the required value (from the PNG specification) is:
+ //
+ // (V * 255) / 65535
+ //
+ // This reduces to round(V / 257), or floor((V + 128.5)/257)
+ //
+ // Represent V as the two byte value vhi.vlo. Make a guess that the
+ // result is the top byte of V, vhi, then the correction to this value
+ // is:
+ //
+ // error = floor(((V-vhi.vhi) + 128.5) / 257)
+ // = floor(((vlo-vhi) + 128.5) / 257)
+ //
+ // This can be approximated using integer arithmetic (and a signed
+ // shift):
+ //
+ // error = (vlo-vhi+128) >> 8;
+ //
+ // The approximate differs from the exact answer only when (vlo-vhi) is
+ // 128; it then gives a correction of +1 when the exact correction is
+ // 0. This gives 128 errors. The exact answer (correct for all 16-bit
+ // input values) is:
+ //
+ // error = (vlo-vhi+128)*65535 >> 24;
+ //
+ // An alternative arithmetic calculation which also gives no errors is:
+ //
+ // (V * 255 + 32895) >> 16
+ return (byte)(((component * 255) + 32895) >> 16);
+ }
+
+ ///
+ /// Scales a value from an 8 bit to it's 16 bit equivalent.
+ ///
+ /// The 8 bit compoonent value.
+ /// The
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static ushort UpscaleFrom8BitTo16Bit(byte component) => (ushort)(component * 257);
+
///
/// Determine the Greatest CommonDivisor (GCD) of two numbers.
///
@@ -39,6 +106,28 @@ namespace SixLabors.ImageSharp
return (a / GreatestCommonDivisor(a, b)) * b;
}
+ ///
+ /// Calculates % 4
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static int Modulo4(int x) => x & 3;
+
+ ///
+ /// Calculates % 8
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static int Modulo8(int x) => x & 7;
+
+ ///
+ /// Fast (x mod m) calculator, with the restriction that
+ /// should be power of 2.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static int ModuloP2(int x, int m)
+ {
+ return x & (m - 1);
+ }
+
///
/// Returns the absolute value of a 32-bit signed integer. Uses bit shifting to speed up the operation.
///
@@ -46,7 +135,7 @@ namespace SixLabors.ImageSharp
/// A number that is greater than , but less than or equal to
///
/// The
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static int FastAbs(int x)
{
int y = x >> 31;
@@ -58,7 +147,7 @@ namespace SixLabors.ImageSharp
///
/// A single-precision floating-point number
/// The number raised to the power of 2.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float Pow2(float x) => x * x;
///
@@ -66,7 +155,7 @@ namespace SixLabors.ImageSharp
///
/// A single-precision floating-point number
/// The number raised to the power of 3.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float Pow3(float x) => x * x * x;
///
@@ -77,7 +166,7 @@ namespace SixLabors.ImageSharp
///
/// The
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static int GetBitsNeededForColorDepth(int colors) => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2)));
///
@@ -85,7 +174,7 @@ namespace SixLabors.ImageSharp
///
/// The bit depth.
/// The
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static int GetColorCountForBitDepth(int bitDepth) => 1 << bitDepth;
///
@@ -94,7 +183,7 @@ namespace SixLabors.ImageSharp
/// The x provided to G(x).
/// The spread of the blur.
/// The Gaussian G(x)
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float Gaussian(float x, float sigma)
{
const float Numerator = 1.0f;
@@ -117,7 +206,7 @@ namespace SixLabors.ImageSharp
///
/// The sine cardinal of .
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float SinC(float f)
{
if (MathF.Abs(f) > Constants.Epsilon)
@@ -140,7 +229,7 @@ namespace SixLabors.ImageSharp
///
/// The .
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float GetBcValue(float x, float b, float c)
{
if (x < 0F)
@@ -176,7 +265,7 @@ namespace SixLabors.ImageSharp
///
/// The bounding .
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public static Rectangle GetBoundingRectangle(Point topLeft, Point bottomRight) => new Rectangle(topLeft.X, topLeft.Y, bottomRight.X - topLeft.X, bottomRight.Y - topLeft.Y);
///
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
new file mode 100644
index 0000000000..0f1ce2ab6a
--- /dev/null
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
@@ -0,0 +1,215 @@
+// Copyright (c) Six Labors and contributors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Tuples;
+
+// ReSharper disable MemberHidesStaticFromOuterClass
+namespace SixLabors.ImageSharp
+{
+ internal static partial class SimdUtils
+ {
+ ///
+ /// Implementation with 256bit / AVX2 intrinsics NOT depending on newer API-s (Vector.Widen etc.)
+ ///
+ public static class BasicIntrinsics256
+ {
+ public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture;
+
+ ///
+ /// as many elements as possible, slicing them down (keeping the remainder).
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertByteToNormalizedFloatReduce(
+ ref ReadOnlySpan source,
+ ref Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+ if (!IsAvailable)
+ {
+ return;
+ }
+
+ int remainder = ImageMaths.Modulo8(source.Length);
+ int adjustedCount = source.Length - remainder;
+
+ if (adjustedCount > 0)
+ {
+ BulkConvertByteToNormalizedFloat(
+ source.Slice(0, adjustedCount),
+ dest.Slice(0, adjustedCount));
+
+ source = source.Slice(adjustedCount);
+ dest = dest.Slice(adjustedCount);
+ }
+ }
+
+ ///
+ /// as many elements as possible, slicing them down (keeping the remainder).
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
+ ref ReadOnlySpan source,
+ ref Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+ if (!IsAvailable)
+ {
+ return;
+ }
+
+ int remainder = ImageMaths.Modulo8(source.Length);
+ int adjustedCount = source.Length - remainder;
+
+ if (adjustedCount > 0)
+ {
+ BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
+
+ source = source.Slice(adjustedCount);
+ dest = dest.Slice(adjustedCount);
+ }
+ }
+
+ ///
+ /// SIMD optimized implementation for .
+ /// Works only with span Length divisible by 8.
+ /// Implementation adapted from:
+ /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
+ /// http://stackoverflow.com/a/536278
+ ///
+ internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest)
+ {
+ VerifyIsAvx2Compatible(nameof(BulkConvertByteToNormalizedFloat));
+ VerifySpanInput(source, dest, 8);
+
+ var bVec = new Vector(256.0f / 255.0f);
+ var magicFloat = new Vector(32768.0f);
+ var magicInt = new Vector(1191182336); // reinterpreded value of 32768.0f
+ var mask = new Vector(255);
+
+ ref Octet.OfByte sourceBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
+
+ ref Vector destBaseAsFloat = ref Unsafe.As>(ref destBaseAsWideOctet);
+
+ int n = dest.Length / 8;
+
+ for (int i = 0; i < n; i++)
+ {
+ ref Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i);
+ ref Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i);
+ d.LoadFrom(ref s);
+ }
+
+ for (int i = 0; i < n; i++)
+ {
+ ref Vector df = ref Unsafe.Add(ref destBaseAsFloat, i);
+
+ var vi = Vector.AsVectorUInt32(df);
+ vi &= mask;
+ vi |= magicInt;
+
+ var vf = Vector.AsVectorSingle(vi);
+ vf = (vf - magicFloat) * bVec;
+
+ df = vf;
+ }
+ }
+
+ ///
+ /// Implementation of which is faster on older runtimes.
+ ///
+ internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest)
+ {
+ VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByteClampOverflows));
+ VerifySpanInput(source, dest, 8);
+
+ if (source.Length == 0)
+ {
+ return;
+ }
+
+ ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
+ int n = source.Length / 8;
+
+ Vector magick = new Vector(32768.0f);
+ Vector scale = new Vector(255f) / new Vector(256f);
+
+ // need to copy to a temporary struct, because
+ // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
+ // does not work. TODO: This might be a CoreClr bug, need to ask/report
+ var temp = default(Octet.OfUInt32);
+ ref Vector tempRef = ref Unsafe.As>(ref temp);
+
+ for (int i = 0; i < n; i++)
+ {
+ // union { float f; uint32_t i; } u;
+ // u.f = 32768.0f + x * (255.0f / 256.0f);
+ // return (uint8_t)u.i;
+ Vector x = Unsafe.Add(ref srcBase, i);
+ x = Vector.Max(x, Vector.Zero);
+ x = Vector.Min(x, Vector.One);
+
+ x = (x * scale) + magick;
+ tempRef = x;
+
+ ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
+ d.LoadFrom(ref temp);
+ }
+ }
+
+ ///
+ /// Convert all values normalized into [0..1] from 'source'
+ /// into 'dest' buffer of . The values are scaled up into [0-255] and rounded.
+ /// This implementation is SIMD optimized and works only when span Length is divisible by 8.
+ /// Based on:
+ ///
+ /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
+ ///
+ ///
+ internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan source, Span dest)
+ {
+ VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByte));
+ VerifySpanInput(source, dest, 8);
+
+ if (source.Length == 0)
+ {
+ return;
+ }
+
+ ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
+ int n = source.Length / 8;
+
+ Vector magick = new Vector(32768.0f);
+ Vector scale = new Vector(255f) / new Vector(256f);
+
+ // need to copy to a temporary struct, because
+ // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
+ // does not work. TODO: This might be a CoreClr bug, need to ask/report
+ var temp = default(Octet.OfUInt32);
+ ref Vector tempRef = ref Unsafe.As>(ref temp);
+
+ for (int i = 0; i < n; i++)
+ {
+ // union { float f; uint32_t i; } u;
+ // u.f = 32768.0f + x * (255.0f / 256.0f);
+ // return (uint8_t)u.i;
+ Vector x = Unsafe.Add(ref srcBase, i);
+ x = (x * scale) + magick;
+ tempRef = x;
+
+ ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
+ d.LoadFrom(ref temp);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
new file mode 100644
index 0000000000..e0d6187dca
--- /dev/null
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
@@ -0,0 +1,178 @@
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// ReSharper disable MemberHidesStaticFromOuterClass
+namespace SixLabors.ImageSharp
+{
+ internal static partial class SimdUtils
+ {
+ ///
+ /// Implementation methods based on newer API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*).
+ /// Only accelerated only on RyuJIT having dotnet/coreclr#10662 merged (.NET Core 2.1+ .NET 4.7.2+)
+ /// See:
+ /// https://github.com/dotnet/coreclr/pull/10662
+ /// API Proposal:
+ /// https://github.com/dotnet/corefx/issues/15957
+ ///
+ public static class ExtendedIntrinsics
+ {
+ public static bool IsAvailable { get; } =
+#if NETCOREAPP2_1
+ // TODO: Also available in .NET 4.7.2, we need to add a build target!
+ Vector.IsHardwareAccelerated;
+#else
+ false;
+#endif
+
+ ///
+ /// as many elements as possible, slicing them down (keeping the remainder).
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertByteToNormalizedFloatReduce(
+ ref ReadOnlySpan source,
+ ref Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+ if (!IsAvailable)
+ {
+ return;
+ }
+
+ int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count);
+ int adjustedCount = source.Length - remainder;
+
+ if (adjustedCount > 0)
+ {
+ BulkConvertByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
+
+ source = source.Slice(adjustedCount);
+ dest = dest.Slice(adjustedCount);
+ }
+ }
+
+ ///
+ /// as many elements as possible, slicing them down (keeping the remainder).
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
+ ref ReadOnlySpan source,
+ ref Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+ if (!IsAvailable)
+ {
+ return;
+ }
+
+ int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count);
+ int adjustedCount = source.Length - remainder;
+
+ if (adjustedCount > 0)
+ {
+ BulkConvertNormalizedFloatToByteClampOverflows(
+ source.Slice(0, adjustedCount),
+ dest.Slice(0, adjustedCount));
+
+ source = source.Slice(adjustedCount);
+ dest = dest.Slice(adjustedCount);
+ }
+ }
+
+ ///
+ /// Implementation , which is faster on new RyuJIT runtime.
+ ///
+ internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest)
+ {
+ VerifySpanInput(source, dest, Vector.Count);
+
+ int n = dest.Length / Vector.Count;
+
+ ref Vector sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+
+ for (int i = 0; i < n; i++)
+ {
+ Vector b = Unsafe.Add(ref sourceBase, i);
+
+ Vector.Widen(b, out Vector s0, out Vector s1);
+ Vector.Widen(s0, out Vector w0, out Vector w1);
+ Vector.Widen(s1, out Vector w2, out Vector w3);
+
+ Vector f0 = ConvertToSingle(w0);
+ Vector f1 = ConvertToSingle(w1);
+ Vector f2 = ConvertToSingle(w2);
+ Vector f3 = ConvertToSingle(w3);
+
+ ref Vector d = ref Unsafe.Add(ref destBase, i * 4);
+ d = f0;
+ Unsafe.Add(ref d, 1) = f1;
+ Unsafe.Add(ref d, 2) = f2;
+ Unsafe.Add(ref d, 3) = f3;
+ }
+ }
+
+ ///
+ /// Implementation of , which is faster on new .NET runtime.
+ ///
+ internal static void BulkConvertNormalizedFloatToByteClampOverflows(
+ ReadOnlySpan source,
+ Span dest)
+ {
+ VerifySpanInput(source, dest, Vector.Count);
+
+ int n = dest.Length / Vector.Count;
+
+ ref Vector sourceBase =
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+
+ for (int i = 0; i < n; i++)
+ {
+ ref Vector s = ref Unsafe.Add(ref sourceBase, i * 4);
+
+ Vector f0 = s;
+ Vector f1 = Unsafe.Add(ref s, 1);
+ Vector f2 = Unsafe.Add(ref s, 2);
+ Vector f3 = Unsafe.Add(ref s, 3);
+
+ Vector w0 = ConvertToUInt32(f0);
+ Vector w1 = ConvertToUInt32(f1);
+ Vector w2 = ConvertToUInt32(f2);
+ Vector w3 = ConvertToUInt32(f3);
+
+ Vector u0 = Vector.Narrow(w0, w1);
+ Vector u1 = Vector.Narrow(w2, w3);
+
+ Vector b = Vector.Narrow(u0, u1);
+
+ Unsafe.Add(ref destBase, i) = b;
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector ConvertToUInt32(Vector vf)
+ {
+ Vector maxBytes = new Vector(255f);
+ vf *= maxBytes;
+ vf += new Vector(0.5f);
+ vf = Vector.Min(Vector.Max(vf, Vector.Zero), maxBytes);
+ Vector vi = Vector.ConvertToInt32(vf);
+ return Vector.AsVectorUInt32(vi);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector ConvertToSingle(Vector u)
+ {
+ Vector vi = Vector.AsVectorInt32(u);
+ Vector v = Vector.ConvertToSingle(vi);
+ v *= new Vector(1f / 255f);
+ return v;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs b/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
new file mode 100644
index 0000000000..565ea08f5d
--- /dev/null
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
@@ -0,0 +1,151 @@
+// Copyright (c) Six Labors and contributors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// ReSharper disable MemberHidesStaticFromOuterClass
+namespace SixLabors.ImageSharp
+{
+ internal static partial class SimdUtils
+ {
+ ///
+ /// Fallback implementation based on (128bit).
+ /// For , efficient software fallback implementations are present,
+ /// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P
+ ///
+ public static class FallbackIntrinsics128
+ {
+ ///
+ /// as many elements as possible, slicing them down (keeping the remainder).
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertByteToNormalizedFloatReduce(
+ ref ReadOnlySpan source,
+ ref Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+ int remainder = ImageMaths.Modulo4(source.Length);
+ int adjustedCount = source.Length - remainder;
+
+ if (adjustedCount > 0)
+ {
+ BulkConvertByteToNormalizedFloat(
+ source.Slice(0, adjustedCount),
+ dest.Slice(0, adjustedCount));
+
+ source = source.Slice(adjustedCount);
+ dest = dest.Slice(adjustedCount);
+ }
+ }
+
+ ///
+ /// as many elements as possible, slicing them down (keeping the remainder).
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
+ ref ReadOnlySpan source,
+ ref Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+ int remainder = ImageMaths.Modulo4(source.Length);
+ int adjustedCount = source.Length - remainder;
+
+ if (adjustedCount > 0)
+ {
+ BulkConvertNormalizedFloatToByteClampOverflows(
+ source.Slice(0, adjustedCount),
+ dest.Slice(0, adjustedCount));
+
+ source = source.Slice(adjustedCount);
+ dest = dest.Slice(adjustedCount);
+ }
+ }
+
+ ///
+ /// Implementation of using .
+ ///
+ [MethodImpl(InliningOptions.ColdPath)]
+ internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest)
+ {
+ VerifySpanInput(source, dest, 4);
+
+ int count = dest.Length / 4;
+ if (count == 0)
+ {
+ return;
+ }
+
+ ref ByteVector4 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref Vector4 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
+
+ const float Scale = 1f / 255f;
+ Vector4 d = default;
+
+ for (int i = 0; i < count; i++)
+ {
+ ref ByteVector4 s = ref Unsafe.Add(ref sBase, i);
+ d.X = s.X;
+ d.Y = s.Y;
+ d.Z = s.Z;
+ d.W = s.W;
+ d *= Scale;
+ Unsafe.Add(ref dBase, i) = d;
+ }
+ }
+
+ ///
+ /// Implementation of using .
+ ///
+ [MethodImpl(InliningOptions.ColdPath)]
+ internal static void BulkConvertNormalizedFloatToByteClampOverflows(
+ ReadOnlySpan source,
+ Span dest)
+ {
+ VerifySpanInput(source, dest, 4);
+
+ int count = source.Length / 4;
+ if (count == 0)
+ {
+ return;
+ }
+
+ ref Vector4 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref ByteVector4 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
+
+ var half = new Vector4(0.5f);
+ var maxBytes = new Vector4(255f);
+
+ for (int i = 0; i < count; i++)
+ {
+ Vector4 s = Unsafe.Add(ref sBase, i);
+ s *= maxBytes;
+ s += half;
+
+ // I'm not sure if Vector4.Clamp() is properly implemented with intrinsics.
+ s = Vector4.Max(Vector4.Zero, s);
+ s = Vector4.Min(maxBytes, s);
+
+ ref ByteVector4 d = ref Unsafe.Add(ref dBase, i);
+ d.X = (byte)s.X;
+ d.Y = (byte)s.Y;
+ d.Z = (byte)s.Z;
+ d.W = (byte)s.W;
+ }
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
+ private struct ByteVector4
+ {
+ public byte X;
+ public byte Y;
+ public byte Z;
+ public byte W;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs
new file mode 100644
index 0000000000..737e620061
--- /dev/null
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs
@@ -0,0 +1,185 @@
+// Copyright (c) Six Labors and contributors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Tuples;
+
+namespace SixLabors.ImageSharp
+{
+ ///
+ /// Various extension and utility methods for and utilizing SIMD capabilities
+ ///
+ internal static partial class SimdUtils
+ {
+ ///
+ /// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte.
+ ///
+ public static bool IsAvx2CompatibleArchitecture { get; } =
+ Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8;
+
+ ///
+ /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics.
+ ///
+ /// The vector
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static Vector4 PseudoRound(this Vector4 v)
+ {
+ var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1));
+
+ return v + (sign * 0.5f);
+ }
+
+ ///
+ /// Rounds all values in 'v' to the nearest integer following semantics.
+ /// Source:
+ ///
+ /// https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110
+ ///
+ ///
+ /// The vector
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static Vector FastRound(this Vector v)
+ {
+ Vector magic0 = new Vector(int.MinValue); // 0x80000000
+ Vector sgn0 = Vector.AsVectorSingle(magic0);
+ Vector and0 = Vector.BitwiseAnd(sgn0, v);
+ Vector or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f));
+ Vector add0 = Vector.Add(v, or0);
+ Vector sub0 = Vector.Subtract(add0, or0);
+ return sub0;
+ }
+
+ ///
+ /// Converts all input -s to -s normalized into [0..1].
+ /// should be the of the same size as ,
+ /// but there are no restrictions on the span's length.
+ ///
+ /// The source span of bytes
+ /// The destination span of floats
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+#if NETCOREAPP2_1
+ ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
+#else
+ BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
+#endif
+ FallbackIntrinsics128.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
+
+ // Deal with the remainder:
+ if (source.Length > 0)
+ {
+ ConverByteToNormalizedFloatRemainder(source, dest);
+ }
+ }
+
+ ///
+ /// Convert all values normalized into [0..1] from 'source' into 'dest' buffer of .
+ /// The values are scaled up into [0-255] and rounded, overflows are clamped.
+ /// should be the of the same size as ,
+ /// but there are no restrictions on the span's length.
+ ///
+ /// The source span of floats
+ /// The destination span of bytes
+ [MethodImpl(InliningOptions.ShortMethod)]
+ internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+#if NETCOREAPP2_1
+ ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
+#else
+ BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
+#endif
+ FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
+
+ // Deal with the remainder:
+ if (source.Length > 0)
+ {
+ ConvertNormalizedFloatToByteRemainder(source, dest);
+ }
+ }
+
+ [MethodImpl(InliningOptions.ColdPath)]
+ private static void ConverByteToNormalizedFloatRemainder(ReadOnlySpan source, Span dest)
+ {
+ ref byte sBase = ref MemoryMarshal.GetReference(source);
+ ref float dBase = ref MemoryMarshal.GetReference(dest);
+
+ // There are at most 3 elements at this point, having a for loop is overkill.
+ // Let's minimize the no. of instructions!
+ switch (source.Length)
+ {
+ case 3:
+ Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2) / 255f;
+ goto case 2;
+ case 2:
+ Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1) / 255f;
+ goto case 1;
+ case 1:
+ dBase = sBase / 255f;
+ break;
+ }
+ }
+
+ [MethodImpl(InliningOptions.ColdPath)]
+ private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan source, Span dest)
+ {
+ ref float sBase = ref MemoryMarshal.GetReference(source);
+ ref byte dBase = ref MemoryMarshal.GetReference(dest);
+
+ switch (source.Length)
+ {
+ case 3:
+ Unsafe.Add(ref dBase, 2) = ConvertToByte(Unsafe.Add(ref sBase, 2));
+ goto case 2;
+ case 2:
+ Unsafe.Add(ref dBase, 1) = ConvertToByte(Unsafe.Add(ref sBase, 1));
+ goto case 1;
+ case 1:
+ dBase = ConvertToByte(sBase);
+ break;
+ }
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static byte ConvertToByte(float f) => (byte)ComparableExtensions.Clamp((f * 255f) + 0.5f, 0, 255f);
+
+ [Conditional("DEBUG")]
+ private static void VerifyIsAvx2Compatible(string operation)
+ {
+ if (!IsAvx2CompatibleArchitecture)
+ {
+ throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
+ }
+ }
+
+ [Conditional("DEBUG")]
+ private static void VerifySpanInput(ReadOnlySpan source, Span dest, int shouldBeDivisibleBy)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+ DebugGuard.IsTrue(
+ ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
+ nameof(source),
+ $"length should be divisable by {shouldBeDivisibleBy}!");
+ }
+
+ [Conditional("DEBUG")]
+ private static void VerifySpanInput(ReadOnlySpan source, Span dest, int shouldBeDivisibleBy)
+ {
+ DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+ DebugGuard.IsTrue(
+ ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
+ nameof(source),
+ $"length should be divisable by {shouldBeDivisibleBy}!");
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Tuples/Octet.cs b/src/ImageSharp/Common/Tuples/Octet.cs
new file mode 100644
index 0000000000..539b74e324
--- /dev/null
+++ b/src/ImageSharp/Common/Tuples/Octet.cs
@@ -0,0 +1,109 @@
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp.Tuples
+{
+ ///
+ /// Contains 8 element value tuples of various types.
+ ///
+ internal static class Octet
+ {
+ ///
+ /// Value tuple of -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
+ public struct OfUInt32
+ {
+ [FieldOffset(0 * sizeof(uint))]
+ public uint V0;
+
+ [FieldOffset(1 * sizeof(uint))]
+ public uint V1;
+
+ [FieldOffset(2 * sizeof(uint))]
+ public uint V2;
+
+ [FieldOffset(3 * sizeof(uint))]
+ public uint V3;
+
+ [FieldOffset(4 * sizeof(uint))]
+ public uint V4;
+
+ [FieldOffset(5 * sizeof(uint))]
+ public uint V5;
+
+ [FieldOffset(6 * sizeof(uint))]
+ public uint V6;
+
+ [FieldOffset(7 * sizeof(uint))]
+ public uint V7;
+
+ public override string ToString()
+ {
+ return $"{nameof(Octet)}.{nameof(OfUInt32)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})";
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void LoadFrom(ref OfByte src)
+ {
+ this.V0 = src.V0;
+ this.V1 = src.V1;
+ this.V2 = src.V2;
+ this.V3 = src.V3;
+ this.V4 = src.V4;
+ this.V5 = src.V5;
+ this.V6 = src.V6;
+ this.V7 = src.V7;
+ }
+ }
+
+ ///
+ /// Value tuple of -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8)]
+ public struct OfByte
+ {
+ [FieldOffset(0)]
+ public byte V0;
+
+ [FieldOffset(1)]
+ public byte V1;
+
+ [FieldOffset(2)]
+ public byte V2;
+
+ [FieldOffset(3)]
+ public byte V3;
+
+ [FieldOffset(4)]
+ public byte V4;
+
+ [FieldOffset(5)]
+ public byte V5;
+
+ [FieldOffset(6)]
+ public byte V6;
+
+ [FieldOffset(7)]
+ public byte V7;
+
+ public override string ToString()
+ {
+ return $"{nameof(Octet)}.{nameof(OfByte)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})";
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void LoadFrom(ref OfUInt32 src)
+ {
+ this.V0 = (byte)src.V0;
+ this.V1 = (byte)src.V1;
+ this.V2 = (byte)src.V2;
+ this.V3 = (byte)src.V3;
+ this.V4 = (byte)src.V4;
+ this.V5 = (byte)src.V5;
+ this.V6 = (byte)src.V6;
+ this.V7 = (byte)src.V7;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Tuples/Vector4Pair.cs b/src/ImageSharp/Common/Tuples/Vector4Pair.cs
index 309d5e2e56..cae283d628 100644
--- a/src/ImageSharp/Common/Tuples/Vector4Pair.cs
+++ b/src/ImageSharp/Common/Tuples/Vector4Pair.cs
@@ -2,11 +2,12 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-namespace SixLabors.ImageSharp.Common.Tuples
+namespace SixLabors.ImageSharp.Tuples
{
///
/// Its faster to process multiple Vector4-s together, so let's pair them!
/// On AVX2 this pair should be convertible to of !
+ /// TODO: Investigate defining this as union with an Octet.OfSingle type.
///
[StructLayout(LayoutKind.Sequential)]
internal struct Vector4Pair
@@ -15,8 +16,6 @@ namespace SixLabors.ImageSharp.Common.Tuples
public Vector4 B;
- private static readonly Vector4 Scale = new Vector4(1 / 255f);
-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void MultiplyInplace(float value)
{
@@ -52,8 +51,9 @@ namespace SixLabors.ImageSharp.Common.Tuples
b = b.FastRound();
// Downscale by 1/255
- this.A *= Scale;
- this.B *= Scale;
+ var scale = new Vector4(1 / 255f);
+ this.A *= scale;
+ this.B *= scale;
}
///
@@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp.Common.Tuples
public override string ToString()
{
- return $"{this.A}, {this.B}";
+ return $"{nameof(Vector4Pair)}({this.A}, {this.B})";
}
}
}
\ No newline at end of file
diff --git a/src/ImageSharp/Formats/Bmp/BmpDecoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpDecoderCore.cs
index 71852acddd..dabab651d0 100644
--- a/src/ImageSharp/Formats/Bmp/BmpDecoderCore.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpDecoderCore.cs
@@ -219,8 +219,6 @@ namespace SixLabors.ImageSharp.Formats.Bmp
where TPixel : struct, IPixel
{
TPixel color = default;
- var rgba = new Rgba32(0, 0, 0, 255);
-
using (Buffer2D buffer = this.memoryAllocator.Allocate2D(width, height, AllocationOptions.Clean))
{
this.UncompressRle8(width, buffer.GetSpan());
@@ -233,8 +231,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
for (int x = 0; x < width; x++)
{
- rgba.Bgr = Unsafe.As(ref colors[bufferRow[x] * 4]);
- color.PackFromRgba32(rgba);
+ color.PackFromBgr24(Unsafe.As(ref colors[bufferRow[x] * 4]));
pixelRow[x] = color;
}
}
@@ -352,8 +349,6 @@ namespace SixLabors.ImageSharp.Formats.Bmp
using (IManagedByteBuffer row = this.memoryAllocator.AllocateManagedByteBuffer(arrayWidth + padding, AllocationOptions.Clean))
{
TPixel color = default;
- var rgba = new Rgba32(0, 0, 0, 255);
-
Span rowSpan = row.GetSpan();
for (int y = 0; y < height; y++)
@@ -363,7 +358,6 @@ namespace SixLabors.ImageSharp.Formats.Bmp
int offset = 0;
Span pixelRow = pixels.GetRowSpan(newY);
- // TODO: Could use PixelOperations here!
for (int x = 0; x < arrayWidth; x++)
{
int colOffset = x * ppb;
@@ -371,9 +365,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
{
int colorIndex = ((rowSpan[offset] >> (8 - bits - (shift * bits))) & mask) * 4;
- // Stored in b-> g-> r order.
- rgba.Bgr = Unsafe.As(ref colors[colorIndex]);
- color.PackFromRgba32(rgba);
+ color.PackFromBgr24(Unsafe.As(ref colors[colorIndex]));
pixelRow[newX] = color;
}
@@ -397,7 +389,6 @@ namespace SixLabors.ImageSharp.Formats.Bmp
int padding = CalculatePadding(width, 2);
int stride = (width * 2) + padding;
TPixel color = default;
- var rgba = new Rgba32(0, 0, 0, 255);
using (IManagedByteBuffer buffer = this.memoryAllocator.AllocateManagedByteBuffer(stride))
{
@@ -412,11 +403,12 @@ namespace SixLabors.ImageSharp.Formats.Bmp
{
short temp = BitConverter.ToInt16(buffer.Array, offset);
- rgba.R = GetBytesFrom5BitValue((temp & Rgb16RMask) >> 10);
- rgba.G = GetBytesFrom5BitValue((temp & Rgb16GMask) >> 5);
- rgba.B = GetBytesFrom5BitValue(temp & Rgb16BMask);
+ var rgb = new Rgb24(
+ GetBytesFrom5BitValue((temp & Rgb16RMask) >> 10),
+ GetBytesFrom5BitValue((temp & Rgb16GMask) >> 5),
+ GetBytesFrom5BitValue(temp & Rgb16BMask));
- color.PackFromRgba32(rgba);
+ color.PackFromRgb24(rgb);
pixelRow[x] = color;
offset += 2;
}
@@ -537,7 +529,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
this.metaData = meta;
short bitsPerPixel = this.infoHeader.BitsPerPixel;
- var bmpMetaData = this.metaData.GetFormatMetaData(BmpFormat.Instance);
+ BmpMetaData bmpMetaData = this.metaData.GetFormatMetaData(BmpFormat.Instance);
// We can only encode at these bit rates so far.
if (bitsPerPixel.Equals((short)BmpBitsPerPixel.Pixel24)
diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
index 44e42528cf..186ff812f7 100644
--- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
@@ -101,9 +101,9 @@ namespace SixLabors.ImageSharp.Formats.Bmp
var fileHeader = new BmpFileHeader(
type: 19778, // BM
- offset: 54,
+ fileSize: 54 + infoHeader.ImageSize,
reserved: 0,
- fileSize: 54 + infoHeader.ImageSize);
+ offset: 54);
#if NETCOREAPP2_1
Span buffer = stackalloc byte[40];
diff --git a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
index 207f126f9e..155e6484f7 100644
--- a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
@@ -481,22 +481,36 @@ namespace SixLabors.ImageSharp.Formats.Gif
}
ref TPixel rowRef = ref MemoryMarshal.GetReference(imageFrame.GetPixelRowSpan(writeY));
- var rgba = new Rgba32(0, 0, 0, 255);
+ bool transFlag = this.graphicsControlExtension.TransparencyFlag;
- // #403 The left + width value can be larger than the image width
- for (int x = descriptor.Left; x < descriptor.Left + descriptor.Width && x < imageWidth; x++)
+ if (!transFlag)
{
- int index = Unsafe.Add(ref indicesRef, i);
-
- if (!this.graphicsControlExtension.TransparencyFlag
- || this.graphicsControlExtension.TransparencyIndex != index)
+ // #403 The left + width value can be larger than the image width
+ for (int x = descriptor.Left; x < descriptor.Left + descriptor.Width && x < imageWidth; x++)
{
+ int index = Unsafe.Add(ref indicesRef, i);
ref TPixel pixel = ref Unsafe.Add(ref rowRef, x);
- rgba.Rgb = colorTable[index];
- pixel.PackFromRgba32(rgba);
+ Rgb24 rgb = colorTable[index];
+ pixel.PackFromRgb24(rgb);
+
+ i++;
}
+ }
+ else
+ {
+ byte transIndex = this.graphicsControlExtension.TransparencyIndex;
+ for (int x = descriptor.Left; x < descriptor.Left + descriptor.Width && x < imageWidth; x++)
+ {
+ int index = Unsafe.Add(ref indicesRef, i);
+ if (transIndex != index)
+ {
+ ref TPixel pixel = ref Unsafe.Add(ref rowRef, x);
+ Rgb24 rgb = colorTable[index];
+ pixel.PackFromRgb24(rgb);
+ }
- i++;
+ i++;
+ }
}
}
diff --git a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
index a8cd169e5d..7851b82ee3 100644
--- a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
@@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Buffers;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@@ -209,16 +210,20 @@ namespace SixLabors.ImageSharp.Formats.Gif
{
// Transparent pixels are much more likely to be found at the end of a palette
int index = -1;
- Rgba32 trans = default;
+ int length = quantized.Palette.Length;
- ref TPixel paletteRef = ref MemoryMarshal.GetReference(quantized.Palette.AsSpan());
- for (int i = quantized.Palette.Length - 1; i >= 0; i--)
+ using (IMemoryOwner rgbaBuffer = this.memoryAllocator.Allocate(length))
{
- ref TPixel entry = ref Unsafe.Add(ref paletteRef, i);
- entry.ToRgba32(ref trans);
- if (trans.Equals(default))
+ Span rgbaSpan = rgbaBuffer.GetSpan();
+ ref Rgba32 paletteRef = ref MemoryMarshal.GetReference(rgbaSpan);
+ PixelOperations.Instance.ToRgba32(quantized.Palette, rgbaSpan, length);
+
+ for (int i = quantized.Palette.Length - 1; i >= 0; i--)
{
- index = i;
+ if (Unsafe.Add(ref paletteRef, i).Equals(default))
+ {
+ index = i;
+ }
}
}
@@ -405,24 +410,13 @@ namespace SixLabors.ImageSharp.Formats.Gif
private void WriteColorTable(QuantizedFrame image, Stream stream)
where TPixel : struct, IPixel
{
- int pixelCount = image.Palette.Length;
-
// The maximium number of colors for the bit depth
int colorTableLength = ImageMaths.GetColorCountForBitDepth(this.bitDepth) * 3;
- Rgb24 rgb = default;
+ int pixelCount = image.Palette.Length;
using (IManagedByteBuffer colorTable = this.memoryAllocator.AllocateManagedByteBuffer(colorTableLength))
{
- ref TPixel paletteRef = ref MemoryMarshal.GetReference(image.Palette.AsSpan());
- ref Rgb24 rgb24Ref = ref Unsafe.As(ref MemoryMarshal.GetReference(colorTable.GetSpan()));
- for (int i = 0; i < pixelCount; i++)
- {
- ref TPixel entry = ref Unsafe.Add(ref paletteRef, i);
- entry.ToRgb24(ref rgb);
- Unsafe.Add(ref rgb24Ref, i) = rgb;
- }
-
- // Write the palette to the stream
+ PixelOperations.Instance.ToRgb24Bytes(image.Palette.AsSpan(), colorTable.GetSpan(), pixelCount);
stream.Write(colorTable.Array, 0, colorTableLength);
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs
index 4b2626c582..1dc72aaf5b 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs
@@ -6,7 +6,7 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-using SixLabors.ImageSharp.Common.Tuples;
+using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
@@ -109,7 +109,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
- destination.Collect(ref r, ref g, ref b);
+ destination.Pack(ref r, ref g, ref b);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
index ab4947e65c..46644258b1 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
@@ -6,7 +6,7 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-using SixLabors.ImageSharp.Common.Tuples;
+using SixLabors.ImageSharp.Tuples;
// ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
@@ -102,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
- destination.Collect(ref rr, ref gg, ref bb);
+ destination.Pack(ref rr, ref gg, ref bb);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
index 60abb7fb2c..456636dc39 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
@@ -6,8 +6,8 @@ using System.Collections.Generic;
using System.Linq;
using System.Numerics;
-using SixLabors.ImageSharp.Common.Tuples;
using SixLabors.ImageSharp.Memory;
+using SixLabors.ImageSharp.Tuples;
using SixLabors.Memory;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
@@ -157,9 +157,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
public Vector4 V0, V1, V2, V3, V4, V5, V6, V7;
///
- /// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order.
+ /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ...
///
- public void Collect(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b)
+ public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b)
{
this.V0.X = r.A.X;
this.V0.Y = g.A.X;
diff --git a/src/ImageSharp/Formats/Png/PngEncoderCore.cs b/src/ImageSharp/Formats/Png/PngEncoderCore.cs
index a46d83707e..1e9dbc71a1 100644
--- a/src/ImageSharp/Formats/Png/PngEncoderCore.cs
+++ b/src/ImageSharp/Formats/Png/PngEncoderCore.cs
@@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Buffers;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.IO;
@@ -312,11 +313,6 @@ namespace SixLabors.ImageSharp.Formats.Png
private void CollectGrayscaleBytes(ReadOnlySpan rowSpan)
where TPixel : struct, IPixel
{
- // Use ITU-R recommendation 709 to match libpng.
- const float RX = .2126F;
- const float GX = .7152F;
- const float BX = .0722F;
-
ref TPixel rowSpanRef = ref MemoryMarshal.GetReference(rowSpan);
Span rawScanlineSpan = this.rawScanline.GetSpan();
ref byte rawScanlineSpanRef = ref MemoryMarshal.GetReference(rawScanlineSpan);
@@ -327,12 +323,18 @@ namespace SixLabors.ImageSharp.Formats.Png
if (this.use16Bit)
{
// 16 bit grayscale
- Rgb48 rgb = default;
- for (int x = 0, o = 0; x < rowSpan.Length; x++, o += 2)
+ using (IMemoryOwner luminanceBuffer = this.memoryAllocator.Allocate(rowSpan.Length))
{
- Unsafe.Add(ref rowSpanRef, x).ToRgb48(ref rgb);
- ushort luminance = (ushort)((RX * rgb.R) + (GX * rgb.G) + (BX * rgb.B));
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), luminance);
+ Span luminanceSpan = luminanceBuffer.GetSpan();
+ ref Gray16 luminanceRef = ref MemoryMarshal.GetReference(luminanceSpan);
+ PixelOperations.Instance.ToGray16(rowSpan, luminanceSpan, rowSpan.Length);
+
+ // Can't map directly to byte array as it's big endian.
+ for (int x = 0, o = 0; x < luminanceSpan.Length; x++, o += 2)
+ {
+ Gray16 luminance = Unsafe.Add(ref luminanceRef, x);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), luminance.PackedValue);
+ }
}
}
else
@@ -340,12 +342,7 @@ namespace SixLabors.ImageSharp.Formats.Png
if (this.bitDepth == 8)
{
// 8 bit grayscale
- Rgb24 rgb = default;
- for (int x = 0; x < rowSpan.Length; x++)
- {
- Unsafe.Add(ref rowSpanRef, x).ToRgb24(ref rgb);
- Unsafe.Add(ref rawScanlineSpanRef, x) = (byte)((RX * rgb.R) + (GX * rgb.G) + (BX * rgb.B));
- }
+ PixelOperations.Instance.ToGray8Bytes(rowSpan, rawScanlineSpan, rowSpan.Length);
}
else
{
@@ -356,14 +353,9 @@ namespace SixLabors.ImageSharp.Formats.Png
Span tempSpan = temp.GetSpan();
ref byte tempSpanRef = ref MemoryMarshal.GetReference(tempSpan);
- Rgb24 rgb = default;
- for (int x = 0; x < rowSpan.Length; x++)
- {
- Unsafe.Add(ref rowSpanRef, x).ToRgb24(ref rgb);
- float luminance = ((RX * rgb.R) + (GX * rgb.G) + (BX * rgb.B)) / scaleFactor;
- Unsafe.Add(ref tempSpanRef, x) = (byte)luminance;
- this.ScaleDownFrom8BitArray(tempSpan, rawScanlineSpan, this.bitDepth);
- }
+ // We need to first create an array of luminance bytes then scale them down to the correct bit depth.
+ PixelOperations.Instance.ToGray8Bytes(rowSpan, tempSpan, rowSpan.Length);
+ this.ScaleDownFrom8BitArray(tempSpan, rawScanlineSpan, this.bitDepth, scaleFactor);
}
}
}
@@ -373,23 +365,31 @@ namespace SixLabors.ImageSharp.Formats.Png
if (this.use16Bit)
{
// 16 bit grayscale + alpha
- Rgba64 rgba = default;
- for (int x = 0, o = 0; x < rowSpan.Length; x++, o += 4)
+ // TODO: Should we consider in the future a GrayAlpha32 type.
+ using (IMemoryOwner rgbaBuffer = this.memoryAllocator.Allocate(rowSpan.Length))
{
- Unsafe.Add(ref rowSpanRef, x).ToRgba64(ref rgba);
- ushort luminance = (ushort)((RX * rgba.R) + (GX * rgba.G) + (BX * rgba.B));
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), luminance);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 2, 2), rgba.A);
+ Span rgbaSpan = rgbaBuffer.GetSpan();
+ ref Rgba64 rgbaRef = ref MemoryMarshal.GetReference(rgbaSpan);
+ PixelOperations.Instance.ToRgba64(rowSpan, rgbaSpan, rowSpan.Length);
+
+ // Can't map directly to byte array as it's big endian.
+ for (int x = 0, o = 0; x < rgbaSpan.Length; x++, o += 4)
+ {
+ Rgba64 rgba = Unsafe.Add(ref rgbaRef, x);
+ ushort luminance = ImageMaths.Get16BitBT709Luminance(rgba.R, rgba.G, rgba.B);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), luminance);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 2, 2), rgba.A);
+ }
}
}
else
{
// 8 bit grayscale + alpha
- Rgba32 rgba = default;
+ // TODO: Should we consider in the future a GrayAlpha16 type.
for (int x = 0, o = 0; x < rowSpan.Length; x++, o += 2)
{
- Unsafe.Add(ref rowSpanRef, x).ToRgba32(ref rgba);
- Unsafe.Add(ref rawScanlineSpanRef, o) = (byte)((RX * rgba.R) + (GX * rgba.G) + (BX * rgba.B));
+ var rgba = Unsafe.Add(ref rowSpanRef, x).ToRgba32();
+ Unsafe.Add(ref rawScanlineSpanRef, o) = ImageMaths.Get8BitBT709Luminance(rgba.R, rgba.G, rgba.B);
Unsafe.Add(ref rawScanlineSpanRef, o + 1) = rgba.A;
}
}
@@ -425,15 +425,21 @@ namespace SixLabors.ImageSharp.Formats.Png
case 8:
{
// 16 bit Rgba
- Rgba64 rgba = default;
- ref TPixel rowSpanRef = ref MemoryMarshal.GetReference(rowSpan);
- for (int x = 0, o = 0; x < rowSpan.Length; x++, o += 8)
+ using (IMemoryOwner rgbaBuffer = this.memoryAllocator.Allocate(rowSpan.Length))
{
- Unsafe.Add(ref rowSpanRef, x).ToRgba64(ref rgba);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), rgba.R);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 2, 2), rgba.G);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 4, 2), rgba.B);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 6, 2), rgba.A);
+ Span rgbaSpan = rgbaBuffer.GetSpan();
+ ref Rgba64 rgbaRef = ref MemoryMarshal.GetReference(rgbaSpan);
+ PixelOperations.Instance.ToRgba64(rowSpan, rgbaSpan, rowSpan.Length);
+
+ // Can't map directly to byte array as it's big endian.
+ for (int x = 0, o = 0; x < rowSpan.Length; x++, o += 8)
+ {
+ Rgba64 rgba = Unsafe.Add(ref rgbaRef, x);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), rgba.R);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 2, 2), rgba.G);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 4, 2), rgba.B);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 6, 2), rgba.A);
+ }
}
break;
@@ -442,14 +448,20 @@ namespace SixLabors.ImageSharp.Formats.Png
default:
{
// 16 bit Rgb
- Rgb48 rgb = default;
- ref TPixel rowSpanRef = ref MemoryMarshal.GetReference(rowSpan);
- for (int x = 0, o = 0; x < rowSpan.Length; x++, o += 6)
+ using (IMemoryOwner rgbBuffer = this.memoryAllocator.Allocate(rowSpan.Length))
{
- Unsafe.Add(ref rowSpanRef, x).ToRgb48(ref rgb);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), rgb.R);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 2, 2), rgb.G);
- BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 4, 2), rgb.B);
+ Span rgbSpan = rgbBuffer.GetSpan();
+ ref Rgb48 rgbRef = ref MemoryMarshal.GetReference(rgbSpan);
+ PixelOperations.Instance.ToRgb48(rowSpan, rgbSpan, rowSpan.Length);
+
+ // Can't map directly to byte array as it's big endian.
+ for (int x = 0, o = 0; x < rowSpan.Length; x++, o += 6)
+ {
+ Rgb48 rgb = Unsafe.Add(ref rgbRef, x);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), rgb.R);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 2, 2), rgb.G);
+ BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 4, 2), rgb.B);
+ }
}
break;
@@ -624,7 +636,6 @@ namespace SixLabors.ImageSharp.Formats.Png
TPixel[] palette = quantized.Palette;
int paletteLength = Math.Min(palette.Length, 256);
int colorTableLength = paletteLength * 3;
- Rgba32 rgba = default;
bool anyAlpha = false;
using (IManagedByteBuffer colorTable = this.memoryAllocator.AllocateManagedByteBuffer(colorTableLength))
@@ -639,7 +650,7 @@ namespace SixLabors.ImageSharp.Formats.Png
if (quantizedSpan.IndexOf((byte)i) > -1)
{
int offset = i * 3;
- palette[i].ToRgba32(ref rgba);
+ var rgba = palette[i].ToRgba32();
byte alpha = rgba.A;
@@ -851,7 +862,8 @@ namespace SixLabors.ImageSharp.Formats.Png
/// The source span in 8 bits.
/// The resultant span in .
/// The bit depth.
- private void ScaleDownFrom8BitArray(ReadOnlySpan source, Span result, int bits)
+ /// The scaling factor.
+ private void ScaleDownFrom8BitArray(ReadOnlySpan source, Span result, int bits, float scale = 1)
{
ref byte sourceRef = ref MemoryMarshal.GetReference(source);
ref byte resultRef = ref MemoryMarshal.GetReference(result);
@@ -864,7 +876,7 @@ namespace SixLabors.ImageSharp.Formats.Png
for (int i = 0; i < source.Length; i++)
{
- int value = Unsafe.Add(ref sourceRef, i) & mask;
+ int value = ((int)MathF.Round(Unsafe.Add(ref sourceRef, i) / scale)) & mask;
v |= value << shift;
if (shift == 0)
diff --git a/src/ImageSharp/Formats/Png/PngScanlineProcessor.cs b/src/ImageSharp/Formats/Png/PngScanlineProcessor.cs
index 5ea25e72d4..70f2cb04b6 100644
--- a/src/ImageSharp/Formats/Png/PngScanlineProcessor.cs
+++ b/src/ImageSharp/Formats/Png/PngScanlineProcessor.cs
@@ -32,30 +32,19 @@ namespace SixLabors.ImageSharp.Formats.Png
{
if (header.BitDepth == 16)
{
- Rgb48 rgb48 = default;
for (int x = 0, o = 0; x < header.Width; x++, o += 2)
{
ushort luminance = BinaryPrimitives.ReadUInt16BigEndian(scanlineSpan.Slice(o, 2));
- rgb48.R = luminance;
- rgb48.G = luminance;
- rgb48.B = luminance;
-
- pixel.PackFromRgb48(rgb48);
+ pixel.PackFromGray16(new Gray16(luminance));
Unsafe.Add(ref rowSpanRef, x) = pixel;
}
}
else
{
- // TODO: We should really be using Rgb24 here but IPixel does not have a PackFromRgb24 method.
- var rgba32 = new Rgba32(0, 0, 0, byte.MaxValue);
for (int x = 0; x < header.Width; x++)
{
byte luminance = (byte)(Unsafe.Add(ref scanlineSpanRef, x) * scaleFactor);
- rgba32.R = luminance;
- rgba32.G = luminance;
- rgba32.B = luminance;
-
- pixel.PackFromRgba32(rgba32);
+ pixel.PackFromGray8(new Gray8(luminance));
Unsafe.Add(ref rowSpanRef, x) = pixel;
}
}
@@ -116,30 +105,19 @@ namespace SixLabors.ImageSharp.Formats.Png
{
if (header.BitDepth == 16)
{
- Rgb48 rgb48 = default;
for (int x = pixelOffset, o = 0; x < header.Width; x += increment, o += 2)
{
ushort luminance = BinaryPrimitives.ReadUInt16BigEndian(scanlineSpan.Slice(o, 2));
- rgb48.R = luminance;
- rgb48.G = luminance;
- rgb48.B = luminance;
-
- pixel.PackFromRgb48(rgb48);
+ pixel.PackFromGray16(new Gray16(luminance));
Unsafe.Add(ref rowSpanRef, x) = pixel;
}
}
else
{
- // TODO: We should really be using Rgb24 here but IPixel does not have a PackFromRgb24 method.
- var rgba32 = new Rgba32(0, 0, 0, byte.MaxValue);
for (int x = pixelOffset, o = 0; x < header.Width; x += increment, o++)
{
byte luminance = (byte)(Unsafe.Add(ref scanlineSpanRef, o) * scaleFactor);
- rgba32.R = luminance;
- rgba32.G = luminance;
- rgba32.B = luminance;
-
- pixel.PackFromRgba32(rgba32);
+ pixel.PackFromGray8(new Gray8(luminance));
Unsafe.Add(ref rowSpanRef, x) = pixel;
}
}
@@ -311,14 +289,12 @@ namespace SixLabors.ImageSharp.Formats.Png
}
else
{
- // TODO: We should have PackFromRgb24.
- var rgba = new Rgba32(0, 0, 0, byte.MaxValue);
for (int x = 0; x < header.Width; x++)
{
int index = Unsafe.Add(ref scanlineSpanRef, x);
- rgba.Rgb = Unsafe.Add(ref palettePixelsRef, index);
+ Rgb24 rgb = Unsafe.Add(ref palettePixelsRef, index);
- pixel.PackFromRgba32(rgba);
+ pixel.PackFromRgb24(rgb);
Unsafe.Add(ref rowSpanRef, x) = pixel;
}
}
@@ -358,13 +334,12 @@ namespace SixLabors.ImageSharp.Formats.Png
}
else
{
- var rgba = new Rgba32(0, 0, 0, byte.MaxValue);
for (int x = pixelOffset, o = 0; x < header.Width; x += increment, o++)
{
int index = Unsafe.Add(ref scanlineSpanRef, o);
- rgba.Rgb = Unsafe.Add(ref palettePixelsRef, index);
+ Rgb24 rgb = Unsafe.Add(ref palettePixelsRef, index);
- pixel.PackFromRgba32(rgba);
+ pixel.PackFromRgb24(rgb);
Unsafe.Add(ref rowSpanRef, x) = pixel;
}
}
@@ -511,14 +486,14 @@ namespace SixLabors.ImageSharp.Formats.Png
}
else
{
- var rgba = new Rgba32(0, 0, 0, byte.MaxValue);
+ Rgb24 rgb = default;
for (int x = pixelOffset, o = 0; x < header.Width; x += increment, o += bytesPerPixel)
{
- rgba.R = Unsafe.Add(ref scanlineSpanRef, o);
- rgba.G = Unsafe.Add(ref scanlineSpanRef, o + bytesPerSample);
- rgba.B = Unsafe.Add(ref scanlineSpanRef, o + (2 * bytesPerSample));
+ rgb.R = Unsafe.Add(ref scanlineSpanRef, o);
+ rgb.G = Unsafe.Add(ref scanlineSpanRef, o + bytesPerSample);
+ rgb.B = Unsafe.Add(ref scanlineSpanRef, o + (2 * bytesPerSample));
- pixel.PackFromRgba32(rgba);
+ pixel.PackFromRgb24(rgb);
Unsafe.Add(ref rowSpanRef, x) = pixel;
}
}
diff --git a/src/ImageSharp/ImageFrame{TPixel}.cs b/src/ImageSharp/ImageFrame{TPixel}.cs
index be1792ced1..ecf9e13ceb 100644
--- a/src/ImageSharp/ImageFrame{TPixel}.cs
+++ b/src/ImageSharp/ImageFrame{TPixel}.cs
@@ -2,8 +2,6 @@
// Licensed under the Apache License, Version 2.0.
using System;
-using System.Buffers;
-using System.Numerics;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
using SixLabors.ImageSharp.Advanced;
@@ -289,22 +287,16 @@ namespace SixLabors.ImageSharp
var target = new ImageFrame(configuration, this.Width, this.Height, this.MetaData.DeepClone());
- ParallelHelper.IterateRowsWithTempBuffer(
+ ParallelHelper.IterateRows(
this.Bounds(),
configuration,
- (rows, tempRowBuffer) =>
+ (rows) =>
{
for (int y = rows.Min; y < rows.Max; y++)
{
Span sourceRow = this.GetPixelRowSpan(y);
Span targetRow = target.GetPixelRowSpan(y);
- Span tempRowSpan = tempRowBuffer.Span;
-
- PixelOperations.Instance.ToScaledVector4(sourceRow, tempRowSpan, sourceRow.Length);
- PixelOperations.Instance.PackFromScaledVector4(
- tempRowSpan,
- targetRow,
- targetRow.Length);
+ PixelOperations.Instance.To(sourceRow, targetRow, sourceRow.Length);
}
});
diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj
index 83b2b12604..17e417dca8 100644
--- a/src/ImageSharp/ImageSharp.csproj
+++ b/src/ImageSharp/ImageSharp.csproj
@@ -76,10 +76,42 @@
TextTemplatingFileGenerator
PixelOperations{TPixel}.Generated.cs
+
+ TextTemplatingFileGenerator
+ Argb32.PixelOperations.Generated.cs
+
+
+ TextTemplatingFileGenerator
+ Bgr24.PixelOperations.Generated.cs
+
+
+ TextTemplatingFileGenerator
+ Bgra32.PixelOperations.Generated.cs
+
+
+ TextTemplatingFileGenerator
+ Gray8.PixelOperations.Generated.cs
+
+
+ TextTemplatingFileGenerator
+ Gray16.PixelOperations.Generated.cs
+
+
+ TextTemplatingFileGenerator
+ Rgb24.PixelOperations.Generated.cs
+
TextTemplatingFileGenerator
Rgba32.PixelOperations.Generated.cs
+
+ TextTemplatingFileGenerator
+ Rgb48.PixelOperations.Generated.cs
+
+
+ TextTemplatingFileGenerator
+ Rgba64.PixelOperations.Generated.cs
+
PorterDuffFunctions.Generated.cs
TextTemplatingFileGenerator
@@ -110,11 +142,51 @@
True
PixelOperations{TPixel}.Generated.tt
+
+ True
+ True
+ Argb32.PixelOperations.Generated.tt
+
+
+ True
+ True
+ Bgr24.PixelOperations.Generated.tt
+
+
+ True
+ True
+ Bgra32.PixelOperations.Generated.tt
+
+
+ True
+ True
+ Gray8.PixelOperations.Generated.tt
+
+
+ True
+ True
+ Gray16.PixelOperations.Generated.tt
+
+
+ True
+ True
+ Rgb24.PixelOperations.Generated.tt
+
True
True
Rgba32.PixelOperations.Generated.tt
+
+ True
+ True
+ Rgb48.PixelOperations.Generated.tt
+
+
+ True
+ True
+ Rgba64.PixelOperations.Generated.tt
+
True
True
diff --git a/src/ImageSharp/PixelFormats/Alpha8.cs b/src/ImageSharp/PixelFormats/Alpha8.cs
index a8d97d31a2..1e724768d0 100644
--- a/src/ImageSharp/PixelFormats/Alpha8.cs
+++ b/src/ImageSharp/PixelFormats/Alpha8.cs
@@ -18,11 +18,14 @@ namespace SixLabors.ImageSharp.PixelFormats
///
/// Initializes a new instance of the struct.
///
- /// The alpha component
- public Alpha8(float alpha)
- {
- this.PackedValue = Pack(alpha);
- }
+ /// The alpha component.
+ public Alpha8(byte alpha) => this.PackedValue = alpha;
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The alpha component.
+ public Alpha8(float alpha) => this.PackedValue = Pack(alpha);
///
public byte PackedValue { get; set; }
@@ -39,11 +42,8 @@ namespace SixLabors.ImageSharp.PixelFormats
///
/// True if the parameter is equal to the parameter; otherwise, false.
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool operator ==(Alpha8 left, Alpha8 right)
- {
- return left.PackedValue == right.PackedValue;
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static bool operator ==(Alpha8 left, Alpha8 right) => left.Equals(right);
///
/// Compares two objects for equality.
@@ -53,161 +53,91 @@ namespace SixLabors.ImageSharp.PixelFormats
///
/// True if the parameter is not equal to the parameter; otherwise, false.
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool operator !=(Alpha8 left, Alpha8 right)
- {
- return left.PackedValue != right.PackedValue;
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static bool operator !=(Alpha8 left, Alpha8 right) => !left.Equals(right);
///
public PixelOperations CreatePixelOperations() => new PixelOperations();
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void PackFromScaledVector4(Vector4 vector)
- {
- this.PackFromVector4(vector);
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromScaledVector4(Vector4 vector) => this.PackFromVector4(vector);
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public Vector4 ToScaledVector4()
- {
- return this.ToVector4();
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Vector4 ToScaledVector4() => this.ToVector4();
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void PackFromVector4(Vector4 vector)
- {
- this.PackedValue = Pack(vector.W);
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromVector4(Vector4 vector) => this.PackedValue = Pack(vector.W);
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public Vector4 ToVector4()
- {
- return new Vector4(0, 0, 0, this.PackedValue / 255F);
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Vector4 ToVector4() => new Vector4(0, 0, 0, this.PackedValue / 255F);
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void PackFromRgba32(Rgba32 source)
- {
- this.PackedValue = source.A;
- }
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromArgb32(Argb32 source) => this.PackedValue = source.A;
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void PackFromArgb32(Argb32 source)
- {
- this.PackedValue = source.A;
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromBgr24(Bgr24 source) => this.PackedValue = byte.MaxValue;
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void PackFromBgra32(Bgra32 source)
- {
- this.PackedValue = source.A;
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromBgra32(Bgra32 source) => this.PackedValue = source.A;
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void ToRgb24(ref Rgb24 dest)
- {
- dest = default(Rgb24);
- }
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromGray8(Gray8 source) => this.PackedValue = byte.MaxValue;
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void ToRgba32(ref Rgba32 dest)
- {
- dest.R = 0;
- dest.G = 0;
- dest.B = 0;
- dest.A = this.PackedValue;
- }
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromGray16(Gray16 source) => this.PackedValue = byte.MaxValue;
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void ToArgb32(ref Argb32 dest)
- {
- dest.R = 0;
- dest.G = 0;
- dest.B = 0;
- dest.A = this.PackedValue;
- }
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromRgb24(Rgb24 source) => this.PackedValue = byte.MaxValue;
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void ToBgr24(ref Bgr24 dest)
- {
- dest = default(Bgr24);
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void PackFromRgba32(Rgba32 source) => this.PackedValue = source.A;
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void ToBgra32(ref Bgra32 dest)
- {
- dest.R = 0;
- dest.G = 0;
- dest.B = 0;
- dest.A = this.PackedValue;
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public Rgba32 ToRgba32() => new Rgba32(0, 0, 0, this.PackedValue);
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public void PackFromRgb48(Rgb48 source) => this.PackedValue = byte.MaxValue;
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void ToRgb48(ref Rgb48 dest)
- {
- dest.R = 0;
- dest.G = 0;
- dest.B = 0;
- }
-
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public void PackFromRgba64(Rgba64 source) => this.PackFromScaledVector4(source.ToScaledVector4());
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void ToRgba64(ref Rgba64 dest) => dest.PackFromScaledVector4(this.ToScaledVector4());
-
///
/// Compares an object with the packed vector.
///
/// The object to compare.
/// True if the object is equal to the packed vector.
- public override bool Equals(object obj)
- {
- return obj is Alpha8 other && this.Equals(other);
- }
+ public override bool Equals(object obj) => obj is Alpha8 other && this.Equals(other);
///
/// Compares another Alpha8 packed vector with the packed vector.
///
/// The Alpha8 packed vector to compare.
/// True if the packed vectors are equal.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool Equals(Alpha8 other)
- {
- return this.PackedValue == other.PackedValue;
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public bool Equals(Alpha8 other) => this.PackedValue.Equals(other.PackedValue);
///
/// Gets a string representation of the packed vector.
///
/// A string representation of the packed vector.
- public override string ToString()
- {
- return (this.PackedValue / 255F).ToString();
- }
+ public override string ToString() => $"Alpha8({this.PackedValue})";
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public override int GetHashCode() => this.PackedValue.GetHashCode();
///
@@ -215,10 +145,7 @@ namespace SixLabors.ImageSharp.PixelFormats
///
/// The float containing the value to pack.
/// The containing the packed values.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static byte Pack(float alpha)
- {
- return (byte)Math.Round(alpha.Clamp(0, 1) * 255F);
- }
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static byte Pack(float alpha) => (byte)Math.Round(alpha.Clamp(0, 1F) * 255F);
}
}
\ No newline at end of file
diff --git a/src/ImageSharp/PixelFormats/Argb32.cs b/src/ImageSharp/PixelFormats/Argb32.cs
index 51d3964ef8..1e3bd93262 100644
--- a/src/ImageSharp/PixelFormats/Argb32.cs
+++ b/src/ImageSharp/PixelFormats/Argb32.cs
@@ -19,7 +19,7 @@ namespace SixLabors.ImageSharp.PixelFormats
/// as it avoids the need to create new values for modification operations.
///
[StructLayout(LayoutKind.Sequential)]
- public struct Argb32 : IPixel, IPackedVector
+ public partial struct Argb32 : IPixel, IPackedVector
{
///
/// Gets or sets the alpha component.
@@ -57,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats
/// The red component.
/// The green component.
/// The blue component.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public Argb32(byte r, byte g, byte b)
{
this.R = r;
@@ -73,7 +73,7 @@ namespace SixLabors.ImageSharp.PixelFormats
/// The green component.
/// The blue component.
/// The alpha component.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public Argb32(byte r, byte g, byte b, byte a)
{
this.R = r;
@@ -89,12 +89,9 @@ namespace SixLabors.ImageSharp.PixelFormats
/// The green component.
/// The blue component.
/// The alpha component.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(InliningOptions.ShortMethod)]
public Argb32(float r, float g, float b, float a = 1)
- : this()
- {
- this.Pack(r, g, b, a);
- }
+ : this() => this.Pack(r, g, b, a);
///
/// Initializes a new instance of the