diff --git a/.editorconfig b/.editorconfig
index 03036f8a5..33fd0577a 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -75,7 +75,7 @@ indent_style = tab
[*.{cs,csx,cake,vb,vbx}]
# Default Severity for all .NET Code Style rules below
-dotnet_analyzer_diagnostic.severity = warning
+dotnet_analyzer_diagnostic.category-style.severity = warning
##########################################
# Language Rules
diff --git a/Directory.Build.props b/Directory.Build.props
index 3df93fcd4..b3e18e5a5 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -18,13 +18,12 @@
-
-
- false
-
-
+
true
-
+
diff --git a/ImageSharp.sln b/ImageSharp.sln
index a8a69d128..aac624bde 100644
--- a/ImageSharp.sln
+++ b/ImageSharp.sln
@@ -621,115 +621,43 @@ Global
EndGlobalSection
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
- Debug|x64 = Debug|x64
- Debug|x86 = Debug|x86
Debug-InnerLoop|Any CPU = Debug-InnerLoop|Any CPU
- Debug-InnerLoop|x64 = Debug-InnerLoop|x64
- Debug-InnerLoop|x86 = Debug-InnerLoop|x86
Release|Any CPU = Release|Any CPU
- Release|x64 = Release|x64
- Release|x86 = Release|x86
Release-InnerLoop|Any CPU = Release-InnerLoop|Any CPU
- Release-InnerLoop|x64 = Release-InnerLoop|x64
- Release-InnerLoop|x86 = Release-InnerLoop|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug|x64.ActiveCfg = Debug|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug|x64.Build.0 = Debug|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug|x86.ActiveCfg = Debug|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug|x86.Build.0 = Debug|Any CPU
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug-InnerLoop|Any CPU.ActiveCfg = Debug-InnerLoop|Any CPU
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug-InnerLoop|Any CPU.Build.0 = Debug-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug-InnerLoop|x64.ActiveCfg = Debug-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug-InnerLoop|x64.Build.0 = Debug-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug-InnerLoop|x86.ActiveCfg = Debug-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Debug-InnerLoop|x86.Build.0 = Debug-InnerLoop|Any CPU
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release|Any CPU.Build.0 = Release|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release|x64.ActiveCfg = Release|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release|x64.Build.0 = Release|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release|x86.ActiveCfg = Release|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release|x86.Build.0 = Release|Any CPU
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release-InnerLoop|Any CPU.ActiveCfg = Release-InnerLoop|Any CPU
{2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release-InnerLoop|Any CPU.Build.0 = Release-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release-InnerLoop|x64.ActiveCfg = Release-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release-InnerLoop|x64.Build.0 = Release-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release-InnerLoop|x86.ActiveCfg = Release-InnerLoop|Any CPU
- {2AA31A1F-142C-43F4-8687-09ABCA4B3A26}.Release-InnerLoop|x86.Build.0 = Release-InnerLoop|Any CPU
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug|x64.ActiveCfg = Debug|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug|x64.Build.0 = Debug|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug|x86.ActiveCfg = Debug|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug|x86.Build.0 = Debug|Any CPU
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug-InnerLoop|Any CPU.ActiveCfg = Debug-InnerLoop|Any CPU
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug-InnerLoop|Any CPU.Build.0 = Debug-InnerLoop|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug-InnerLoop|x64.ActiveCfg = Debug-InnerLoop|x64
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug-InnerLoop|x64.Build.0 = Debug-InnerLoop|x64
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug-InnerLoop|x86.ActiveCfg = Debug-InnerLoop|x86
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Debug-InnerLoop|x86.Build.0 = Debug-InnerLoop|x86
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release|Any CPU.Build.0 = Release|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release|x64.ActiveCfg = Release|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release|x64.Build.0 = Release|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release|x86.ActiveCfg = Release|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release|x86.Build.0 = Release|Any CPU
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release-InnerLoop|Any CPU.ActiveCfg = Release-InnerLoop|Any CPU
{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release-InnerLoop|Any CPU.Build.0 = Release-InnerLoop|Any CPU
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release-InnerLoop|x64.ActiveCfg = Release-InnerLoop|x64
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release-InnerLoop|x64.Build.0 = Release-InnerLoop|x64
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release-InnerLoop|x86.ActiveCfg = Release-InnerLoop|x86
- {EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}.Release-InnerLoop|x86.Build.0 = Release-InnerLoop|x86
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug|x64.ActiveCfg = Debug|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug|x64.Build.0 = Debug|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug|x86.ActiveCfg = Debug|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug|x86.Build.0 = Debug|Any CPU
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug-InnerLoop|Any CPU.ActiveCfg = Debug-InnerLoop|Any CPU
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug-InnerLoop|Any CPU.Build.0 = Debug-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug-InnerLoop|x64.ActiveCfg = Debug-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug-InnerLoop|x64.Build.0 = Debug-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug-InnerLoop|x86.ActiveCfg = Debug-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Debug-InnerLoop|x86.Build.0 = Debug-InnerLoop|Any CPU
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release|Any CPU.Build.0 = Release|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release|x64.ActiveCfg = Release|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release|x64.Build.0 = Release|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release|x86.ActiveCfg = Release|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release|x86.Build.0 = Release|Any CPU
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release-InnerLoop|Any CPU.ActiveCfg = Release-InnerLoop|Any CPU
{2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release-InnerLoop|Any CPU.Build.0 = Release-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release-InnerLoop|x64.ActiveCfg = Release-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release-InnerLoop|x64.Build.0 = Release-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release-InnerLoop|x86.ActiveCfg = Release-InnerLoop|Any CPU
- {2BF743D8-2A06-412D-96D7-F448F00C5EA5}.Release-InnerLoop|x86.Build.0 = Release-InnerLoop|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug|x64.ActiveCfg = Debug|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug|x64.Build.0 = Debug|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug|x86.ActiveCfg = Debug|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug|x86.Build.0 = Debug|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Debug-InnerLoop|Any CPU.ActiveCfg = Debug-InnerLoop|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Debug-InnerLoop|Any CPU.Build.0 = Debug-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug-InnerLoop|x64.ActiveCfg = Debug-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug-InnerLoop|x64.Build.0 = Debug-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug-InnerLoop|x86.ActiveCfg = Debug-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Debug-InnerLoop|x86.Build.0 = Debug-InnerLoop|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Release|Any CPU.Build.0 = Release|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release|x64.ActiveCfg = Release|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release|x64.Build.0 = Release|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release|x86.ActiveCfg = Release|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release|x86.Build.0 = Release|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Release-InnerLoop|Any CPU.ActiveCfg = Release-InnerLoop|Any CPU
{FC527290-2F22-432C-B77B-6E815726B02C}.Release-InnerLoop|Any CPU.Build.0 = Release-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release-InnerLoop|x64.ActiveCfg = Release-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release-InnerLoop|x64.Build.0 = Release-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release-InnerLoop|x86.ActiveCfg = Release-InnerLoop|Any CPU
- {FC527290-2F22-432C-B77B-6E815726B02C}.Release-InnerLoop|x86.Build.0 = Release-InnerLoop|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/shared-infrastructure b/shared-infrastructure
index 48e73f455..9b94ebc4b 160000
--- a/shared-infrastructure
+++ b/shared-infrastructure
@@ -1 +1 @@
-Subproject commit 48e73f455f15eafefbe3175efc7433e5f277e506
+Subproject commit 9b94ebc4be9b7a8d7620c257e6ee485455973332
diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index 058199301..db65b84cc 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -23,6 +23,16 @@ namespace SixLabors.ImageSharp
private const int ShuffleAlphaControl = 0b_11_11_11_11;
#endif
+#if !SUPPORTS_BITOPERATIONS
+ private static ReadOnlySpan Log2DeBruijn => new byte[32]
+ {
+ 00, 09, 01, 10, 13, 21, 02, 29,
+ 11, 14, 16, 18, 22, 25, 03, 30,
+ 08, 12, 20, 28, 15, 17, 24, 07,
+ 19, 27, 23, 06, 26, 05, 04, 31
+ };
+#endif
+
///
/// Determine the Greatest CommonDivisor (GCD) of two numbers.
///
@@ -756,7 +766,7 @@ namespace SixLabors.ImageSharp
/// widening them to 32-bit integers and performing four additions.
///
///
- /// byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+ /// byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
/// is widened and added onto as such:
///
/// accumulator += i32(1, 2, 3, 4);
@@ -825,5 +835,49 @@ namespace SixLabors.ImageSharp
return Sse2.ConvertToInt32(vsum);
}
#endif
+
+ ///
+ /// Calculates floored log of the specified value, base 2.
+ /// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
+ ///
+ /// The value.
+ public static int Log2(uint value)
+ {
+#if SUPPORTS_BITOPERATIONS
+ return BitOperations.Log2(value);
+#else
+ return Log2SoftwareFallback(value);
+#endif
+ }
+
+#if !SUPPORTS_BITOPERATIONS
+ ///
+ /// Calculates floored log of the specified value, base 2.
+ /// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
+ /// Bit hacking with deBruijn sequence, extremely fast yet does not use any intrinsics so will work on every platform/runtime.
+ ///
+ ///
+ /// Description of this bit hacking can be found here:
+ /// https://cstheory.stackexchange.com/questions/19524/using-the-de-bruijn-sequence-to-find-the-lceil-log-2-v-rceil-of-an-integer
+ ///
+ /// The value.
+ private static int Log2SoftwareFallback(uint value)
+ {
+ // No AggressiveInlining due to large method size
+ // Has conventional contract 0->0 (Log(0) is undefined) by default, no need for if checking
+
+ // Fill trailing zeros with ones, eg 00010010 becomes 00011111
+ value |= value >> 01;
+ value |= value >> 02;
+ value |= value >> 04;
+ value |= value >> 08;
+ value |= value >> 16;
+
+ // uint.MaxValue >> 27 is always in range [0 - 31] so we use Unsafe.AddByteOffset to avoid bounds check
+ return Unsafe.AddByteOffset(
+ ref MemoryMarshal.GetReference(Log2DeBruijn),
+ (IntPtr)(int)((value * 0x07C4ACDDu) >> 27)); // uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
+ }
+#endif
}
}
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index 4faf577fd..b530a37e7 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -532,6 +532,7 @@ namespace SixLabors.ImageSharp
///
/// Performs a multiplication and an addition of the .
///
+ /// ret = (vm0 * vm1) + va
/// The vector to add to the intermediate result.
/// The first vector to multiply.
/// The second vector to multiply.
@@ -552,6 +553,30 @@ namespace SixLabors.ImageSharp
}
}
+ ///
+ /// Performs a multiplication and a substraction of the .
+ ///
+ /// ret = (vm0 * vm1) - vs
+ /// The vector to substract from the intermediate result.
+ /// The first vector to multiply.
+ /// The second vector to multiply.
+ /// The .
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static Vector256 MultiplySubstract(
+ in Vector256 vs,
+ in Vector256 vm0,
+ in Vector256 vm1)
+ {
+ if (Fma.IsSupported)
+ {
+ return Fma.MultiplySubtract(vm1, vm0, vs);
+ }
+ else
+ {
+ return Avx.Subtract(Avx.Multiply(vm0, vm1), vs);
+ }
+ }
+
///
/// as many elements as possible, slicing them down (keeping the remainder).
///
diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
index 5cf54388d..b407ad221 100644
--- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
@@ -98,7 +98,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
this.memoryAllocator = memoryAllocator;
this.bitsPerPixel = options.BitsPerPixel;
this.writeV4Header = options.SupportTransparency;
- this.quantizer = options.Quantizer ?? KnownQuantizers.Wu;
+ this.quantizer = options.Quantizer ?? KnownQuantizers.Octree;
}
///
diff --git a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
index 9c1e95285..585f87b3e 100644
--- a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
@@ -151,7 +151,7 @@ namespace SixLabors.ImageSharp.Formats.Gif
// since the palette is unchanging. This allows a reduction of memory usage across
// multi frame gifs using a global palette.
EuclideanPixelMap pixelMap = default;
- bool pixelMapSet = false;
+ bool pixelMapHasValue = false;
for (int i = 0; i < image.Frames.Count; i++)
{
ImageFrame frame = image.Frames[i];
@@ -166,17 +166,22 @@ namespace SixLabors.ImageSharp.Formats.Gif
}
else
{
- if (!pixelMapSet)
+ if (!pixelMapHasValue)
{
- pixelMapSet = true;
+ pixelMapHasValue = true;
pixelMap = new EuclideanPixelMap(this.configuration, quantized.Palette);
}
- using var paletteFrameQuantizer = new PaletteQuantizer(this.configuration, this.quantizer.Options, pixelMap);
+ using var paletteFrameQuantizer = new PaletteQuantizer(this.configuration, this.quantizer.Options, pixelMap, true);
using IndexedImageFrame paletteQuantized = paletteFrameQuantizer.QuantizeFrame(frame, frame.Bounds());
this.WriteImageData(paletteQuantized, stream);
}
}
+
+ if (pixelMapHasValue)
+ {
+ pixelMap.Dispose();
+ }
}
private void EncodeLocal(Image image, IndexedImageFrame quantized, Stream stream)
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
index 2d19f5ce2..8ca7b0c80 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
@@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
///
/// Represents a Jpeg block with coefficients.
///
- [StructLayout(LayoutKind.Sequential)]
+ [StructLayout(LayoutKind.Explicit)]
internal partial struct Block8x8F : IEquatable
{
///
@@ -27,29 +27,69 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public const int Size = 64;
#pragma warning disable SA1600 // ElementsMustBeDocumented
+ [FieldOffset(0)]
public Vector4 V0L;
+ [FieldOffset(16)]
public Vector4 V0R;
+ [FieldOffset(32)]
public Vector4 V1L;
+ [FieldOffset(48)]
public Vector4 V1R;
+ [FieldOffset(64)]
public Vector4 V2L;
+ [FieldOffset(80)]
public Vector4 V2R;
+ [FieldOffset(96)]
public Vector4 V3L;
+ [FieldOffset(112)]
public Vector4 V3R;
+ [FieldOffset(128)]
public Vector4 V4L;
+ [FieldOffset(144)]
public Vector4 V4R;
+ [FieldOffset(160)]
public Vector4 V5L;
+ [FieldOffset(176)]
public Vector4 V5R;
+ [FieldOffset(192)]
public Vector4 V6L;
+ [FieldOffset(208)]
public Vector4 V6R;
+ [FieldOffset(224)]
public Vector4 V7L;
+ [FieldOffset(240)]
public Vector4 V7R;
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ ///
+ /// A number of rows of 8 scalar coefficients each in
+ ///
+ public const int RowCount = 8;
+
+ [FieldOffset(0)]
+ public Vector256 V0;
+ [FieldOffset(32)]
+ public Vector256 V1;
+ [FieldOffset(64)]
+ public Vector256 V2;
+ [FieldOffset(96)]
+ public Vector256 V3;
+ [FieldOffset(128)]
+ public Vector256 V4;
+ [FieldOffset(160)]
+ public Vector256 V5;
+ [FieldOffset(192)]
+ public Vector256 V6;
+ [FieldOffset(224)]
+ public Vector256 V7;
+#endif
#pragma warning restore SA1600 // ElementsMustBeDocumented
///
@@ -278,14 +318,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
if (Avx.IsSupported)
{
var valueVec = Vector256.Create(value);
- Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec);
- Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec);
- Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec);
- Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec);
- Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec);
- Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec);
- Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec);
- Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec);
+ this.V0 = Avx.Multiply(this.V0, valueVec);
+ this.V1 = Avx.Multiply(this.V1, valueVec);
+ this.V2 = Avx.Multiply(this.V2, valueVec);
+ this.V3 = Avx.Multiply(this.V3, valueVec);
+ this.V4 = Avx.Multiply(this.V4, valueVec);
+ this.V5 = Avx.Multiply(this.V5, valueVec);
+ this.V6 = Avx.Multiply(this.V6, valueVec);
+ this.V7 = Avx.Multiply(this.V7, valueVec);
}
else
#endif
@@ -319,45 +359,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
- Unsafe.As>(ref this.V0L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V0L),
- Unsafe.As>(ref other.V0L));
-
- Unsafe.As>(ref this.V1L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V1L),
- Unsafe.As>(ref other.V1L));
-
- Unsafe.As>(ref this.V2L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V2L),
- Unsafe.As>(ref other.V2L));
-
- Unsafe.As>(ref this.V3L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V3L),
- Unsafe.As>(ref other.V3L));
-
- Unsafe.As>(ref this.V4L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V4L),
- Unsafe.As>(ref other.V4L));
-
- Unsafe.As>(ref this.V5L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V5L),
- Unsafe.As>(ref other.V5L));
-
- Unsafe.As>(ref this.V6L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V6L),
- Unsafe.As>(ref other.V6L));
-
- Unsafe.As>(ref this.V7L)
- = Avx.Multiply(
- Unsafe.As>(ref this.V7L),
- Unsafe.As>(ref other.V7L));
+ this.V0 = Avx.Multiply(this.V0, other.V0);
+ this.V1 = Avx.Multiply(this.V1, other.V1);
+ this.V2 = Avx.Multiply(this.V2, other.V2);
+ this.V3 = Avx.Multiply(this.V3, other.V3);
+ this.V4 = Avx.Multiply(this.V4, other.V4);
+ this.V5 = Avx.Multiply(this.V5, other.V5);
+ this.V6 = Avx.Multiply(this.V6, other.V6);
+ this.V7 = Avx.Multiply(this.V7, other.V7);
}
else
#endif
@@ -392,14 +401,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
if (Avx.IsSupported)
{
var valueVec = Vector256.Create(value);
- Unsafe.As>(ref this.V0L) = Avx.Add(Unsafe.As>(ref this.V0L), valueVec);
- Unsafe.As>(ref this.V1L) = Avx.Add(Unsafe.As>(ref this.V1L), valueVec);
- Unsafe.As>(ref this.V2L) = Avx.Add(Unsafe.As>(ref this.V2L), valueVec);
- Unsafe.As>(ref this.V3L) = Avx.Add(Unsafe.As>(ref this.V3L), valueVec);
- Unsafe.As>(ref this.V4L) = Avx.Add(Unsafe.As>(ref this.V4L), valueVec);
- Unsafe.As>(ref this.V5L) = Avx.Add(Unsafe.As>(ref this.V5L), valueVec);
- Unsafe.As>(ref this.V6L) = Avx.Add(Unsafe.As>(ref this.V6L), valueVec);
- Unsafe.As>(ref this.V7L) = Avx.Add(Unsafe.As>(ref this.V7L), valueVec);
+ this.V0 = Avx.Add(this.V0, valueVec);
+ this.V1 = Avx.Add(this.V1, valueVec);
+ this.V2 = Avx.Add(this.V2, valueVec);
+ this.V3 = Avx.Add(this.V3, valueVec);
+ this.V4 = Avx.Add(this.V4, valueVec);
+ this.V5 = Avx.Add(this.V5, valueVec);
+ this.V6 = Avx.Add(this.V6, valueVec);
+ this.V7 = Avx.Add(this.V7, valueVec);
}
else
#endif
@@ -468,81 +477,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
DivideRoundAll(ref dest, ref qt);
}
- ///
- /// Scales the 16x16 region represented by the 4 source blocks to the 8x8 DST block.
- ///
- /// The destination block.
- /// The source block.
- public static unsafe void Scale16X16To8X8(ref Block8x8F destination, ReadOnlySpan source)
- {
-#if SUPPORTS_RUNTIME_INTRINSICS
- if (Avx2.IsSupported)
- {
- Scale16X16To8X8Vectorized(ref destination, source);
- return;
- }
-#endif
-
- Scale16X16To8X8Scalar(ref destination, source);
- }
-
- private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnlySpan source)
- {
-#if SUPPORTS_RUNTIME_INTRINSICS
- Debug.Assert(Avx2.IsSupported, "AVX2 is required to execute this method");
-
- var f2 = Vector256.Create(2f);
- var f025 = Vector256.Create(0.25f);
- Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32));
- ref Vector256 destRef = ref Unsafe.As>(ref destination);
-
- for (int i = 0; i < 2; i++)
- {
- ref Vector256 in1 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i));
- ref Vector256 in2 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1));
-
- for (int j = 0; j < 8; j += 2)
- {
- Vector256 a = Unsafe.Add(ref in1, j);
- Vector256 b = Unsafe.Add(ref in1, j + 1);
- Vector256 c = Unsafe.Add(ref in2, j);
- Vector256 d = Unsafe.Add(ref in2, j + 1);
-
- Vector256 calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
- Vector256 calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
- Vector256 calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
- Vector256 calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);
-
- Vector256 sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
- Vector256 add = Avx.Add(sum, f2);
- Vector256 res = Avx.Multiply(add, f025);
-
- destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
- destRef = ref Unsafe.Add(ref destRef, 1);
- }
- }
-#endif
- }
-
- private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, ReadOnlySpan source)
- {
- for (int i = 0; i < 4; i++)
- {
- int dstOff = ((i & 2) << 4) | ((i & 1) << 2);
- Block8x8F iSource = source[i];
-
- for (int y = 0; y < 4; y++)
- {
- for (int x = 0; x < 4; x++)
- {
- int j = (16 * y) + (2 * x);
- float sum = iSource[j] + iSource[j + 1] + iSource[j + 8] + iSource[j + 9];
- destination[(8 * y) + x + dstOff] = (sum + 2) * .25F;
- }
- }
- }
- }
-
[MethodImpl(InliningOptions.ShortMethod)]
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
{
@@ -553,19 +487,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
var vadd = Vector256.Create(.5F);
var vone = Vector256.Create(1f);
- ref Vector256 aBase = ref Unsafe.AsRef(Unsafe.As>(ref a.V0L));
- ref Vector256 bBase = ref Unsafe.AsRef(Unsafe.As>(ref b.V0L));
- ref Vector256 aEnd = ref Unsafe.Add(ref aBase, 8);
-
- do
+ for (int i = 0; i < RowCount; i++)
{
- Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd);
- Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff);
-
- aBase = ref Unsafe.Add(ref aBase, 1);
- bBase = ref Unsafe.Add(ref bBase, 1);
+ ref Vector256 aRow = ref Unsafe.Add(ref a.V0, i);
+ ref Vector256 bRow = ref Unsafe.Add(ref b.V0, i);
+ Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aRow), vone), vadd);
+ aRow = Avx.Add(Avx.Divide(aRow, bRow), voff);
}
- while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd));
}
else
#endif
@@ -805,26 +733,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Vector256 t0 = Avx.UnpackLow(r0, r1);
Vector256 t2 = Avx.UnpackLow(r2, r3);
Vector256 v = Avx.Shuffle(t0, t2, 0x4E);
- Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
- Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
+ d.V0 = Avx.Blend(t0, v, 0xCC);
+ d.V1 = Avx.Blend(t2, v, 0x33);
Vector256 t4 = Avx.UnpackLow(r4, r5);
Vector256 t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
- Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
- Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
+ d.V4 = Avx.Blend(t4, v, 0xCC);
+ d.V5 = Avx.Blend(t6, v, 0x33);
Vector256 t1 = Avx.UnpackHigh(r0, r1);
Vector256 t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
- Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
- Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
+ d.V2 = Avx.Blend(t1, v, 0xCC);
+ d.V3 = Avx.Blend(t3, v, 0x33);
Vector256 t5 = Avx.UnpackHigh(r4, r5);
Vector256 t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
- Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
- Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
+ d.V6 = Avx.Blend(t5, v, 0xCC);
+ d.V7 = Avx.Blend(t7, v, 0x33);
}
else
#endif
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs
index bc2c7634b..bc6c8c6cc 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
@@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
}
- this.Values = new uint[maxValue + 1];
+ this.Values = new int[maxValue + 1];
int code = 0;
int k = 0;
@@ -54,7 +54,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
int bits = (i + 1) << 24;
for (int j = 0; j < spec.Count[i]; j++)
{
- this.Values[spec.Values[k]] = (uint)(bits | code);
+ this.Values[spec.Values[k]] = bits | code;
code++;
k++;
}
@@ -66,6 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
///
/// Gets the collection of huffman values.
///
- public uint[] Values { get; }
+ public int[] Values { get; }
}
-}
\ No newline at end of file
+}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
new file mode 100644
index 000000000..860a9c323
--- /dev/null
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
@@ -0,0 +1,427 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using SixLabors.ImageSharp.Memory;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
+{
+ internal class HuffmanScanEncoder
+ {
+ ///
+ /// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count).
+ ///
+ ///
+ /// This is subject to change, 1024 seems to be the best value in terms of performance.
+ /// expects it to be at least 8 (see comments in method body).
+ ///
+ private const int EmitBufferSizeInBytes = 1024;
+
+ ///
+ /// A buffer for reducing the number of stream writes when emitting Huffman tables.
+ ///
+ private readonly byte[] emitBuffer = new byte[EmitBufferSizeInBytes];
+
+ ///
+ /// Number of filled bytes in buffer
+ ///
+ private int emitLen = 0;
+
+ ///
+ /// Emmited bits 'micro buffer' before being transfered to the .
+ ///
+ private int accumulatedBits;
+
+ ///
+ /// Number of jagged bits stored in
+ ///
+ private int bitCount;
+
+ private Block8x8F temporalBlock1;
+ private Block8x8F temporalBlock2;
+
+ ///
+ /// The output stream. All attempted writes after the first error become no-ops.
+ ///
+ private readonly Stream target;
+
+ public HuffmanScanEncoder(Stream outputStream)
+ {
+ this.target = outputStream;
+ }
+
+ ///
+ /// Encodes the image with no subsampling.
+ ///
+ /// The pixel format.
+ /// The pixel accessor providing access to the image pixels.
+ /// Luminance quantization table provided by the callee
+ /// Chrominance quantization table provided by the callee
+ /// The token to monitor for cancellation.
+ public void Encode444(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
+ where TPixel : unmanaged, IPixel
+ {
+ var unzig = ZigZag.CreateUnzigTable();
+
+ // ReSharper disable once InconsistentNaming
+ int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
+
+ ImageFrame frame = pixels.Frames.RootFrame;
+ Buffer2D pixelBuffer = frame.PixelBuffer;
+ RowOctet currentRows = default;
+
+ var pixelConverter = new YCbCrForwardConverter444(frame);
+
+ for (int y = 0; y < pixels.Height; y += 8)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ currentRows.Update(pixelBuffer, y);
+
+ for (int x = 0; x < pixels.Width; x += 8)
+ {
+ pixelConverter.Convert(x, y, ref currentRows);
+
+ prevDCY = this.WriteBlock(
+ QuantIndex.Luminance,
+ prevDCY,
+ ref pixelConverter.Y,
+ ref luminanceQuantTable,
+ ref unzig);
+
+ prevDCCb = this.WriteBlock(
+ QuantIndex.Chrominance,
+ prevDCCb,
+ ref pixelConverter.Cb,
+ ref chrominanceQuantTable,
+ ref unzig);
+
+ prevDCCr = this.WriteBlock(
+ QuantIndex.Chrominance,
+ prevDCCr,
+ ref pixelConverter.Cr,
+ ref chrominanceQuantTable,
+ ref unzig);
+ }
+ }
+
+ this.FlushInternalBuffer();
+ }
+
+ ///
+ /// Encodes the image with subsampling. The Cb and Cr components are each subsampled
+ /// at a factor of 2 both horizontally and vertically.
+ ///
+ /// The pixel format.
+ /// The pixel accessor providing access to the image pixels.
+ /// Luminance quantization table provided by the callee
+ /// Chrominance quantization table provided by the callee
+ /// The token to monitor for cancellation.
+ public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
+ where TPixel : unmanaged, IPixel
+ {
+ var unzig = ZigZag.CreateUnzigTable();
+
+ // ReSharper disable once InconsistentNaming
+ int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
+ ImageFrame frame = pixels.Frames.RootFrame;
+ Buffer2D pixelBuffer = frame.PixelBuffer;
+ RowOctet currentRows = default;
+
+ var pixelConverter = new YCbCrForwardConverter420(frame);
+
+ for (int y = 0; y < pixels.Height; y += 16)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ for (int x = 0; x < pixels.Width; x += 16)
+ {
+ for (int i = 0; i < 2; i++)
+ {
+ int yOff = i * 8;
+ currentRows.Update(pixelBuffer, y + yOff);
+ pixelConverter.Convert(x, y, ref currentRows, i);
+
+ prevDCY = this.WriteBlock(
+ QuantIndex.Luminance,
+ prevDCY,
+ ref pixelConverter.YLeft,
+ ref luminanceQuantTable,
+ ref unzig);
+
+ prevDCY = this.WriteBlock(
+ QuantIndex.Luminance,
+ prevDCY,
+ ref pixelConverter.YRight,
+ ref luminanceQuantTable,
+ ref unzig);
+ }
+
+ prevDCCb = this.WriteBlock(
+ QuantIndex.Chrominance,
+ prevDCCb,
+ ref pixelConverter.Cb,
+ ref chrominanceQuantTable,
+ ref unzig);
+
+ prevDCCr = this.WriteBlock(
+ QuantIndex.Chrominance,
+ prevDCCr,
+ ref pixelConverter.Cr,
+ ref chrominanceQuantTable,
+ ref unzig);
+ }
+ }
+
+ this.FlushInternalBuffer();
+ }
+
+ ///
+ /// Encodes the image with no chroma, just luminance.
+ ///
+ /// The pixel format.
+ /// The pixel accessor providing access to the image pixels.
+ /// Luminance quantization table provided by the callee
+ /// The token to monitor for cancellation.
+ public void EncodeGrayscale(Image pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
+ where TPixel : unmanaged, IPixel
+ {
+ var unzig = ZigZag.CreateUnzigTable();
+
+ // ReSharper disable once InconsistentNaming
+ int prevDCY = 0;
+
+ var pixelConverter = LuminanceForwardConverter.Create();
+ ImageFrame frame = pixels.Frames.RootFrame;
+ Buffer2D pixelBuffer = frame.PixelBuffer;
+ RowOctet currentRows = default;
+
+ for (int y = 0; y < pixels.Height; y += 8)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ currentRows.Update(pixelBuffer, y);
+
+ for (int x = 0; x < pixels.Width; x += 8)
+ {
+ pixelConverter.Convert(frame, x, y, ref currentRows);
+
+ prevDCY = this.WriteBlock(
+ QuantIndex.Luminance,
+ prevDCY,
+ ref pixelConverter.Y,
+ ref luminanceQuantTable,
+ ref unzig);
+ }
+ }
+
+ this.FlushInternalBuffer();
+ }
+
+ ///
+ /// Writes a block of pixel data using the given quantization table,
+ /// returning the post-quantized DC value of the DCT-transformed block.
+ /// The block is in natural (not zig-zag) order.
+ ///
+ /// The quantization table index.
+ /// The previous DC value.
+ /// Source block
+ /// Quantization table
+ /// The 8x8 Unzig block.
+ /// The .
+ private int WriteBlock(
+ QuantIndex index,
+ int prevDC,
+ ref Block8x8F src,
+ ref Block8x8F quant,
+ ref ZigZag unZig)
+ {
+ ref Block8x8F refTemp1 = ref this.temporalBlock1;
+ ref Block8x8F refTemp2 = ref this.temporalBlock2;
+
+ FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2);
+
+ Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig);
+
+ int dc = (int)refTemp2[0];
+
+ // Emit the DC delta.
+ this.EmitHuffRLE((2 * (int)index) + 0, 0, dc - prevDC);
+
+ // Emit the AC components.
+ int h = (2 * (int)index) + 1;
+ int runLength = 0;
+
+ for (int zig = 1; zig < Block8x8F.Size; zig++)
+ {
+ int ac = (int)refTemp2[zig];
+
+ if (ac == 0)
+ {
+ runLength++;
+ }
+ else
+ {
+ while (runLength > 15)
+ {
+ this.EmitHuff(h, 0xf0);
+ runLength -= 16;
+ }
+
+ this.EmitHuffRLE(h, runLength, ac);
+ runLength = 0;
+ }
+ }
+
+ if (runLength > 0)
+ {
+ this.EmitHuff(h, 0x00);
+ }
+
+ return dc;
+ }
+
+ ///
+ /// Emits the least significant count of bits to the stream write buffer.
+ /// The precondition is bits
+ ///
+ /// < 1<<nBits && nBits <= 16
+ ///
+ /// .
+ ///
+ /// The packed bits.
+ /// The number of bits
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void Emit(int bits, int count)
+ {
+ count += this.bitCount;
+ bits <<= 32 - count;
+ bits |= this.accumulatedBits;
+
+ // Only write if more than 8 bits.
+ if (count >= 8)
+ {
+ // Track length
+ while (count >= 8)
+ {
+ byte b = (byte)(bits >> 24);
+ this.emitBuffer[this.emitLen++] = b;
+ if (b == byte.MaxValue)
+ {
+ this.emitBuffer[this.emitLen++] = byte.MinValue;
+ }
+
+ bits <<= 8;
+ count -= 8;
+ }
+
+ // This can emit 4 times of:
+ // 1 byte guaranteed
+ // 1 extra byte.MinValue byte if previous one was byte.MaxValue
+ // Thus writing (1 + 1) * 4 = 8 bytes max
+ // So we must check if emit buffer has extra 8 bytes, if not - call stream.Write
+ if (this.emitLen > EmitBufferSizeInBytes - 8)
+ {
+ this.target.Write(this.emitBuffer, 0, this.emitLen);
+ this.emitLen = 0;
+ }
+ }
+
+ this.accumulatedBits = bits;
+ this.bitCount = count;
+ }
+
+ ///
+ /// Emits the given value with the given Huffman encoder.
+ ///
+ /// The index of the Huffman encoder
+ /// The value to encode.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void EmitHuff(int index, int value)
+ {
+ int x = HuffmanLut.TheHuffmanLut[index].Values[value];
+ this.Emit(x & ((1 << 24) - 1), x >> 24);
+ }
+
+ ///
+ /// Emits a run of runLength copies of value encoded with the given Huffman encoder.
+ ///
+ /// The index of the Huffman encoder
+ /// The number of copies to encode.
+ /// The value to encode.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void EmitHuffRLE(int index, int runLength, int value)
+ {
+ int a = value;
+ int b = value;
+ if (a < 0)
+ {
+ a = -value;
+ b = value - 1;
+ }
+
+ int bt = GetHuffmanEncodingLength((uint)a);
+
+ this.EmitHuff(index, (runLength << 4) | bt);
+ if (bt > 0)
+ {
+ this.Emit(b & ((1 << bt) - 1), bt);
+ }
+ }
+
+ ///
+ /// Writes remaining bytes from internal buffer to the target stream.
+ ///
+ /// Pads last byte with 1's if necessary
+ private void FlushInternalBuffer()
+ {
+ // pad last byte with 1's
+ int padBitsCount = 8 - (this.bitCount % 8);
+ if (padBitsCount != 0)
+ {
+ this.Emit((1 << padBitsCount) - 1, padBitsCount);
+ }
+
+ // flush remaining bytes
+ if (this.emitLen != 0)
+ {
+ this.target.Write(this.emitBuffer, 0, this.emitLen);
+ }
+ }
+
+ ///
+ /// Calculates how many minimum bits needed to store given value for Huffman jpeg encoding.
+ ///
+ ///
+ /// This method returns 0 for input value 0. This is done specificaly for huffman encoding
+ ///
+ /// The value.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int GetHuffmanEncodingLength(uint value)
+ {
+ DebugGuard.IsTrue(value <= (1 << 16), "Huffman encoder is supposed to encode a value of 16bit size max");
+#if SUPPORTS_BITOPERATIONS
+ // This should have been implemented as (BitOperations.Log2(value) + 1) as in non-intrinsic implementation
+ // But internal log2 is implementated like this: (31 - (int)Lzcnt.LeadingZeroCount(value))
+
+ // BitOperations.Log2 implementation also checks if input value is zero for the convention 0->0
+ // Lzcnt would return 32 for input value of 0 - no need to check that with branching
+ // Fallback code if Lzcnt is not supported still use if-check
+ // But most modern CPUs support this instruction so this should not be a problem
+ return 32 - System.Numerics.BitOperations.LeadingZeroCount(value);
+#else
+ // Ideally:
+ // if 0 - return 0 in this case
+ // else - return log2(value) + 1
+ //
+ // Hack based on input value constaint:
+ // We know that input values are guaranteed to be maximum 16 bit large for huffman encoding
+ // We can safely shift input value for one bit -> log2(value << 1)
+ // Because of the 16 bit value constraint it won't overflow
+ // With that input value change we no longer need to add 1 before returning
+ // And this eliminates need to check if input value is zero - it is a standard convention which Log2SoftwareFallback adheres to
+ return Numerics.Log2(value << 1);
+#endif
+ }
+ }
+}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs
index cc81130dd..fc5b9a868 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs
@@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
ref Block8x8F yBlock = ref this.Y;
ref L8 l8Start = ref l8Span[0];
- for (int i = 0; i < 64; i++)
+ for (int i = 0; i < Block8x8F.Size; i++)
{
ref L8 c = ref Unsafe.Add(ref l8Start, i);
yBlock[i] = c.PackedValue;
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
index 3c1a02c5a..15574a32a 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
@@ -92,48 +92,144 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
return tables;
}
- ///
- /// Optimized method to allocates the correct y, cb, and cr values to the DCT blocks from the given r, g, b values.
- ///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- private void ConvertPixelInto(
- int r,
- int g,
- int b,
- ref Block8x8F yResult,
- ref Block8x8F cbResult,
- ref Block8x8F crResult,
- int i)
+ private float CalculateY(byte r, byte g, byte b)
{
// float y = (0.299F * r) + (0.587F * g) + (0.114F * b);
- yResult[i] = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits;
+ return (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits;
+ }
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private float CalculateCb(byte r, byte g, byte b)
+ {
// float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
- cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits;
+ return (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits;
+ }
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private float CalculateCr(byte r, byte g, byte b)
+ {
// float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
- crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits;
+ return (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits;
}
- public void Convert(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
+ ///
+ /// Converts Rgb24 pixels into YCbCr color space with 4:4:4 subsampling sampling of luminance and chroma.
+ ///
+ /// Span of Rgb24 pixel data
+ /// Resulting Y values block
+ /// Resulting Cb values block
+ /// Resulting Cr values block
+ public void Convert444(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
{
ref Rgb24 rgbStart = ref rgbSpan[0];
- for (int i = 0; i < 64; i++)
+ for (int i = 0; i < Block8x8F.Size; i++)
{
- ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i);
-
- this.ConvertPixelInto(
- c.R,
- c.G,
- c.B,
- ref yBlock,
- ref cbBlock,
- ref crBlock,
- i);
+ Rgb24 c = Unsafe.Add(ref rgbStart, i);
+
+ yBlock[i] = this.CalculateY(c.R, c.G, c.B);
+ cbBlock[i] = this.CalculateCb(c.R, c.G, c.B);
+ crBlock[i] = this.CalculateCr(c.R, c.G, c.B);
}
}
+ ///
+ /// Converts Rgb24 pixels into YCbCr color space with 4:2:0 subsampling of luminance and chroma.
+ ///
+ /// Calculates 2 out of 4 luminance blocks and half of chroma blocks. This method must be called twice per 4x 8x8 DCT blocks with different row param.
+ /// Span of Rgb24 pixel data
+ /// First or "left" resulting Y block
+ /// Second or "right" resulting Y block
+ /// Resulting Cb values block
+ /// Resulting Cr values block
+ /// Row index of the 16x16 block, 0 or 1
+ public void Convert420(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row)
+ {
+ DebugGuard.MustBeBetweenOrEqualTo(row, 0, 1, nameof(row));
+
+ ref float yBlockLeftRef = ref Unsafe.As(ref yBlockLeft);
+ ref float yBlockRightRef = ref Unsafe.As(ref yBlockRight);
+
+ // 0-31 or 32-63
+ // upper or lower part
+ int chromaWriteOffset = row * (Block8x8F.Size / 2);
+ ref float cbBlockRef = ref Unsafe.Add(ref Unsafe.As(ref cbBlock), chromaWriteOffset);
+ ref float crBlockRef = ref Unsafe.Add(ref Unsafe.As(ref crBlock), chromaWriteOffset);
+
+ ref Rgb24 rgbStart = ref rgbSpan[0];
+
+ for (int i = 0; i < 8; i += 2)
+ {
+ int yBlockWriteOffset = i * 8;
+ ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, i * 16);
+
+ int chromaOffset = 8 * (i / 2);
+
+ // left
+ this.ConvertChunk420(
+ ref stride,
+ ref Unsafe.Add(ref yBlockLeftRef, yBlockWriteOffset),
+ ref Unsafe.Add(ref cbBlockRef, chromaOffset),
+ ref Unsafe.Add(ref crBlockRef, chromaOffset));
+
+ // right
+ this.ConvertChunk420(
+ ref Unsafe.Add(ref stride, 8),
+ ref Unsafe.Add(ref yBlockRightRef, yBlockWriteOffset),
+ ref Unsafe.Add(ref cbBlockRef, chromaOffset + 4),
+ ref Unsafe.Add(ref crBlockRef, chromaOffset + 4));
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, ref float cbBlock, ref float crBlock)
+ {
+ // jpeg 8x8 blocks are processed as 16x16 blocks with 16x8 subpasses (this is done for performance reasons)
+ // each row is 16 pixels wide thus +16 stride reference offset
+ // resulting luminance (Y`) are sampled at original resolution thus +8 reference offset
+ for (int k = 0; k < 8; k += 2)
+ {
+ ref float yBlockRef = ref Unsafe.Add(ref yBlock, k);
+
+ // top row
+ Rgb24 px0 = Unsafe.Add(ref stride, k);
+ Rgb24 px1 = Unsafe.Add(ref stride, k + 1);
+ yBlockRef = this.CalculateY(px0.R, px0.G, px0.B);
+ Unsafe.Add(ref yBlockRef, 1) = this.CalculateY(px1.R, px1.G, px1.B);
+
+ // bottom row
+ Rgb24 px2 = Unsafe.Add(ref stride, k + 16);
+ Rgb24 px3 = Unsafe.Add(ref stride, k + 17);
+ Unsafe.Add(ref yBlockRef, 8) = this.CalculateY(px2.R, px2.G, px2.B);
+ Unsafe.Add(ref yBlockRef, 9) = this.CalculateY(px3.R, px3.G, px3.B);
+
+ // chroma average for 2x2 pixel block
+ Unsafe.Add(ref cbBlock, k / 2) = this.CalculateAverageCb(px0, px1, px2, px3);
+ Unsafe.Add(ref crBlock, k / 2) = this.CalculateAverageCr(px0, px1, px2, px3);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private float CalculateAverageCb(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3)
+ {
+ return 0.25f
+ * (this.CalculateCb(px0.R, px0.G, px0.B)
+ + this.CalculateCb(px1.R, px1.G, px1.B)
+ + this.CalculateCb(px2.R, px2.G, px2.B)
+ + this.CalculateCb(px3.R, px3.G, px3.B));
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private float CalculateAverageCr(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3)
+ {
+ return 0.25f
+ * (this.CalculateCr(px0.R, px0.G, px0.B)
+ + this.CalculateCr(px1.R, px1.G, px1.B)
+ + this.CalculateCr(px2.R, px2.G, px2.B)
+ + this.CalculateCr(px3.R, px3.G, px3.B));
+ }
+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int Fix(float x)
=> (int)((x * (1L << ScaleBits)) + 0.5F);
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
index 209cc3c6a..9566ee862 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
@@ -27,19 +27,45 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
}
+ public static int AvxCompatibilityPadding
+ {
+ // rgb byte matrices contain 8 strides by 8 pixels each, thus 64 pixels total
+ // Strides are stored sequentially - one big span of 64 * 3 = 192 bytes
+ // Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits
+ // Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride:
+ // stride 0 0 - 192 -(+64bits)-> 256
+ // stride 1 192 - 384 -(+64bits)-> 448
+ // stride 2 384 - 576 -(+64bits)-> 640
+ // stride 3 576 - 768 -(+64bits)-> 832
+ // stride 4 768 - 960 -(+64bits)-> 1024
+ // stride 5 960 - 1152 -(+64bits)-> 1216
+ // stride 6 1152 - 1344 -(+64bits)-> 1408
+ // stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION
+ //
+ // Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits
+ // This is not permitted - we are reading foreign memory
+ //
+ // 8 byte padding to rgb byte span will solve this problem without extra code in converters
+ get
+ {
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (IsSupported)
+ {
+ return 8;
+ }
+#endif
+ return 0;
+ }
+ }
+
#if SUPPORTS_RUNTIME_INTRINSICS
+
private static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[]
{
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0,
3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0
};
- private static ReadOnlySpan MoveLast24BytesToSeparateLanes => new byte[]
- {
- 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
- 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0
- };
-
private static ReadOnlySpan ExtractRgb => new byte[]
{
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF,
@@ -47,7 +73,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
};
#endif
- public static void Convert(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
+ ///
+ /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:4:4 subsampling
+ ///
+ /// Total size of rgb span must be 200 bytes
+ /// Span of rgb pixels with size of 64
+ /// 8x8 destination matrix of Luminance(Y) converted data
+ /// 8x8 destination matrix of Chrominance(Cb) converted data
+ /// 8x8 destination matrix of Chrominance(Cr) converted data
+ public static void Convert444(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
{
Debug.Assert(IsSupported, "AVX2 is required to run this converter");
@@ -63,18 +97,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
var f05 = Vector256.Create(0.5f);
var zero = Vector256.Create(0).AsByte();
- ref Vector256 inRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan));
- ref Vector256 destYRef = ref Unsafe.As>(ref yBlock);
- ref Vector256 destCbRef = ref Unsafe.As>(ref cbBlock);
- ref Vector256 destCrRef = ref Unsafe.As>(ref crBlock);
+ ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan));
+ ref Vector256 destYRef = ref yBlock.V0;
+ ref Vector256 destCbRef = ref cbBlock.V0;
+ ref Vector256 destCrRef = ref crBlock.V0;
var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb));
Vector256 rgb, rg, bx;
Vector256 r, g, b;
- for (int i = 0; i < 7; i++)
+
+ const int bytesPerRgbStride = 24;
+ for (int i = 0; i < 8; i++)
{
- rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte();
+ rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
@@ -94,27 +130,130 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
}
+#endif
+ }
+
+ ///
+ /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling
+ ///
+ public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row)
+ {
+ Debug.Assert(IsSupported, "AVX2 is required to run this converter");
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ var f0299 = Vector256.Create(0.299f);
+ var f0587 = Vector256.Create(0.587f);
+ var f0114 = Vector256.Create(0.114f);
+ var fn0168736 = Vector256.Create(-0.168736f);
+ var fn0331264 = Vector256.Create(-0.331264f);
+ var f128 = Vector256.Create(128f);
+ var fn0418688 = Vector256.Create(-0.418688f);
+ var fn0081312F = Vector256.Create(-0.081312F);
+ var f05 = Vector256.Create(0.5f);
+ var zero = Vector256.Create(0).AsByte();
+
+ ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan));
+
+ int destOffset = row * 4;
- extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes));
- rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte();
- rgb = Avx2.Shuffle(rgb, extractRgbMask);
+ ref Vector256 destCbRef = ref Unsafe.Add(ref Unsafe.As>(ref cbBlock), destOffset);
+ ref Vector256 destCrRef = ref Unsafe.Add(ref Unsafe.As>(ref crBlock), destOffset);
- rg = Avx2.UnpackLow(rgb, zero);
- bx = Avx2.UnpackHigh(rgb, zero);
+ var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
+ var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb));
+ Vector256 rgb, rg, bx;
+ Vector256 r, g, b;
+
+ Span> rDataLanes = stackalloc Vector256[4];
+ Span> gDataLanes = stackalloc Vector256[4];
+ Span> bDataLanes = stackalloc Vector256[4];
+
+ const int bytesPerRgbStride = 24;
+ for (int i = 0; i < 4; i++)
+ {
+ // 16x2 => 8x1
+ // left 8x8 column conversions
+ for (int j = 0; j < 4; j += 2)
+ {
+ rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * ((i * 4) + j))).AsUInt32(), extractToLanesMask).AsByte();
+
+ rgb = Avx2.Shuffle(rgb, extractRgbMask);
+
+ rg = Avx2.UnpackLow(rgb, zero);
+ bx = Avx2.UnpackHigh(rgb, zero);
+
+ r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
+ g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
+ b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
+
+ int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1);
+
+ // (0.299F * r) + (0.587F * g) + (0.114F * b);
+ Unsafe.Add(ref yBlockLeft.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
+
+ rDataLanes[j] = r;
+ gDataLanes[j] = g;
+ bDataLanes[j] = b;
+ }
+
+ // 16x2 => 8x1
+ // right 8x8 column conversions
+ for (int j = 1; j < 4; j += 2)
+ {
+ rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * ((i * 4) + j))).AsUInt32(), extractToLanesMask).AsByte();
+
+ rgb = Avx2.Shuffle(rgb, extractRgbMask);
+
+ rg = Avx2.UnpackLow(rgb, zero);
+ bx = Avx2.UnpackHigh(rgb, zero);
- r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
- g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
- b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
+ r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
+ g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
+ b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
- // (0.299F * r) + (0.587F * g) + (0.114F * b);
- Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
+ int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1);
- // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
- Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
+ // (0.299F * r) + (0.587F * g) + (0.114F * b);
+ Unsafe.Add(ref yBlockRight.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
- // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
- Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
+ rDataLanes[j] = r;
+ gDataLanes[j] = g;
+ bDataLanes[j] = b;
+ }
+
+ r = Scale16x2_8x1(rDataLanes);
+ g = Scale16x2_8x1(gDataLanes);
+ b = Scale16x2_8x1(bDataLanes);
+
+ // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
+ Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
+
+ // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
+ Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
+ }
#endif
}
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ ///
+ /// Scales 16x2 matrix to 8x1 using 2x2 average
+ ///
+ /// Input matrix consisting of 4 256bit vectors
+ /// 256bit vector containing upper and lower scaled parts of the input matrix
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static Vector256 Scale16x2_8x1(ReadOnlySpan> v)
+ {
+ Debug.Assert(Avx2.IsSupported, "AVX2 is required to run this converter");
+ DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements");
+
+ var f025 = Vector256.Create(0.25f);
+
+ Vector256 left = Avx.Add(v[0], v[2]);
+ Vector256 right = Avx.Add(v[1], v[3]);
+ Vector256 avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025);
+
+ return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle();
+ }
+#endif
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs
new file mode 100644
index 000000000..a4abd532b
--- /dev/null
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs
@@ -0,0 +1,121 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
+{
+ ///
+ /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
+ ///
+ /// The pixel type to work on
+ internal ref struct YCbCrForwardConverter420
+ where TPixel : unmanaged, IPixel
+ {
+ ///
+ /// Number of pixels processed per single call
+ ///
+ private const int PixelsPerSample = 16 * 8;
+
+ ///
+ /// Total byte size of processed pixels converted from TPixel to
+ ///
+ private const int RgbSpanByteSize = PixelsPerSample * 3;
+
+ ///
+ /// of sampling area from given frame pixel buffer
+ ///
+ private static readonly Size SampleSize = new Size(16, 8);
+
+ ///
+ /// The left Y component
+ ///
+ public Block8x8F YLeft;
+
+ ///
+ /// The left Y component
+ ///
+ public Block8x8F YRight;
+
+ ///
+ /// The Cb component
+ ///
+ public Block8x8F Cb;
+
+ ///
+ /// The Cr component
+ ///
+ public Block8x8F Cr;
+
+ ///
+ /// The color conversion tables
+ ///
+ private RgbToYCbCrConverterLut colorTables;
+
+ ///
+ /// Temporal 16x8 block to hold TPixel data
+ ///
+ private Span pixelSpan;
+
+ ///
+ /// Temporal RGB block
+ ///
+ private Span rgbSpan;
+
+ ///
+ /// Sampled pixel buffer size
+ ///
+ private Size samplingAreaSize;
+
+ ///
+ /// for internal operations
+ ///
+ private Configuration config;
+
+ public YCbCrForwardConverter420(ImageFrame frame)
+ {
+ // matrices would be filled during convert calls
+ this.YLeft = default;
+ this.YRight = default;
+ this.Cb = default;
+ this.Cr = default;
+
+ // temporal pixel buffers
+ this.pixelSpan = new TPixel[PixelsPerSample].AsSpan();
+ this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan());
+
+ // frame data
+ this.samplingAreaSize = new Size(frame.Width, frame.Height);
+ this.config = frame.GetConfiguration();
+
+ // conversion vector fallback data
+ if (!RgbToYCbCrConverterVectorized.IsSupported)
+ {
+ this.colorTables = RgbToYCbCrConverterLut.Create();
+ }
+ else
+ {
+ this.colorTables = default;
+ }
+ }
+
+ public void Convert(int x, int y, ref RowOctet currentRows, int idx)
+ {
+ YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize);
+
+ PixelOperations.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan);
+
+ if (RgbToYCbCrConverterVectorized.IsSupported)
+ {
+ RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx);
+ }
+ else
+ {
+ this.colorTables.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx);
+ }
+ }
+ }
+}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs
new file mode 100644
index 000000000..ef589272b
--- /dev/null
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs
@@ -0,0 +1,122 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
+{
+ ///
+ /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
+ ///
+ /// The pixel type to work on
+ internal ref struct YCbCrForwardConverter444
+ where TPixel : unmanaged, IPixel
+ {
+ ///
+ /// Number of pixels processed per single call
+ ///
+ private const int PixelsPerSample = 8 * 8;
+
+ ///
+ /// Total byte size of processed pixels converted from TPixel to
+ ///
+ private const int RgbSpanByteSize = PixelsPerSample * 3;
+
+ ///
+ /// of sampling area from given frame pixel buffer
+ ///
+ private static readonly Size SampleSize = new Size(8, 8);
+
+ ///
+ /// The Y component
+ ///
+ public Block8x8F Y;
+
+ ///
+ /// The Cb component
+ ///
+ public Block8x8F Cb;
+
+ ///
+ /// The Cr component
+ ///
+ public Block8x8F Cr;
+
+ ///
+ /// The color conversion tables
+ ///
+ private RgbToYCbCrConverterLut colorTables;
+
+ ///
+ /// Temporal 64-byte span to hold unconverted TPixel data
+ ///
+ private Span pixelSpan;
+
+ ///
+ /// Temporal 64-byte span to hold converted Rgb24 data
+ ///
+ private Span rgbSpan;
+
+ ///
+ /// Sampled pixel buffer size
+ ///
+ private Size samplingAreaSize;
+
+ ///
+ /// for internal operations
+ ///
+ private Configuration config;
+
+ public YCbCrForwardConverter444(ImageFrame frame)
+ {
+ // matrices would be filled during convert calls
+ this.Y = default;
+ this.Cb = default;
+ this.Cr = default;
+
+ // temporal pixel buffers
+ this.pixelSpan = new TPixel[PixelsPerSample].AsSpan();
+ this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan());
+
+ // frame data
+ this.samplingAreaSize = new Size(frame.Width, frame.Height);
+ this.config = frame.GetConfiguration();
+
+ // conversion vector fallback data
+ if (!RgbToYCbCrConverterVectorized.IsSupported)
+ {
+ this.colorTables = RgbToYCbCrConverterLut.Create();
+ }
+ else
+ {
+ this.colorTables = default;
+ }
+ }
+
+ ///
+ /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , )
+ ///
+ public void Convert(int x, int y, ref RowOctet currentRows)
+ {
+ YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize);
+
+ PixelOperations.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan);
+
+ ref Block8x8F yBlock = ref this.Y;
+ ref Block8x8F cbBlock = ref this.Cb;
+ ref Block8x8F crBlock = ref this.Cr;
+
+ if (RgbToYCbCrConverterVectorized.IsSupported)
+ {
+ RgbToYCbCrConverterVectorized.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
+ }
+ else
+ {
+ this.colorTables.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
+ }
+ }
+ }
+}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
index 81e64b277..f5ef77091 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
@@ -2,81 +2,59 @@
// Licensed under the Apache License, Version 2.0.
using System;
-using SixLabors.ImageSharp.Advanced;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
- ///
- /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
- ///
- /// The pixel type to work on
- internal ref struct YCbCrForwardConverter
+ internal static class YCbCrForwardConverter
where TPixel : unmanaged, IPixel
{
- ///
- /// The Y component
- ///
- public Block8x8F Y;
-
- ///
- /// The Cb component
- ///
- public Block8x8F Cb;
-
- ///
- /// The Cr component
- ///
- public Block8x8F Cr;
+ public static void LoadAndStretchEdges(RowOctet source, Span dest, Point start, Size sampleSize, Size totalSize)
+ {
+ DebugGuard.MustBeBetweenOrEqualTo(start.X, 1, totalSize.Width - 1, nameof(start.X));
+ DebugGuard.MustBeBetweenOrEqualTo(start.Y, 1, totalSize.Height - 1, nameof(start.Y));
- ///
- /// The color conversion tables
- ///
- private RgbToYCbCrConverterLut colorTables;
+ int width = Math.Min(sampleSize.Width, totalSize.Width - start.X);
+ int height = Math.Min(sampleSize.Height, totalSize.Height - start.Y);
- ///
- /// Temporal 8x8 block to hold TPixel data
- ///
- private GenericBlock8x8 pixelBlock;
+ uint byteWidth = (uint)(width * Unsafe.SizeOf());
+ int remainderXCount = sampleSize.Width - width;
- ///
- /// Temporal RGB block
- ///
- private GenericBlock8x8 rgbBlock;
+ ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(dest));
+ int rowSizeInBytes = sampleSize.Width * Unsafe.SizeOf();
- public static YCbCrForwardConverter Create()
- {
- var result = default(YCbCrForwardConverter);
- if (!RgbToYCbCrConverterVectorized.IsSupported)
+ for (int y = 0; y < height; y++)
{
- // Avoid creating lookup tables, when vectorized converter is supported
- result.colorTables = RgbToYCbCrConverterLut.Create();
- }
+ Span row = source[y];
- return result;
- }
+ ref byte s = ref Unsafe.As(ref row[start.X]);
+ ref byte d = ref Unsafe.Add(ref blockStart, y * rowSizeInBytes);
- ///
- /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , )
- ///
- public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows)
- {
- this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows);
+ Unsafe.CopyBlock(ref d, ref s, byteWidth);
+
+ ref TPixel last = ref Unsafe.Add(ref Unsafe.As(ref d), width - 1);
- Span rgbSpan = this.rgbBlock.AsSpanUnsafe();
- PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan);
+ for (int x = 1; x <= remainderXCount; x++)
+ {
+ Unsafe.Add(ref last, x) = last;
+ }
+ }
- ref Block8x8F yBlock = ref this.Y;
- ref Block8x8F cbBlock = ref this.Cb;
- ref Block8x8F crBlock = ref this.Cr;
+ int remainderYCount = sampleSize.Height - height;
- if (RgbToYCbCrConverterVectorized.IsSupported)
+ if (remainderYCount == 0)
{
- RgbToYCbCrConverterVectorized.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
+ return;
}
- else
+
+ ref byte lastRowStart = ref Unsafe.Add(ref blockStart, (height - 1) * rowSizeInBytes);
+
+ for (int y = 1; y <= remainderYCount; y++)
{
- this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
+ ref byte remStart = ref Unsafe.Add(ref lastRowStart, rowSizeInBytes * y);
+ Unsafe.CopyBlock(ref remStart, ref lastRowStart, (uint)rowSizeInBytes);
}
}
}
diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
index a6d0622dd..0f569b5da 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
@@ -1,8 +1,13 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
+using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
@@ -10,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
///
/// Contains inaccurate, but fast forward and inverse DCT implementations.
///
- internal static class FastFloatingPointDCT
+ internal static partial class FastFloatingPointDCT
{
#pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore
private const float C_1_175876 = 1.175875602f;
@@ -38,147 +43,31 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
private const float C_0_765367 = 0.765366865f;
private const float C_0_125 = 0.1250f;
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector256 C_V_0_5411 = Vector256.Create(0.541196f);
+ private static readonly Vector256 C_V_1_3065 = Vector256.Create(1.306563f);
+ private static readonly Vector256 C_V_1_1758 = Vector256.Create(1.175876f);
+ private static readonly Vector256 C_V_0_7856 = Vector256.Create(0.785695f);
+ private static readonly Vector256 C_V_1_3870 = Vector256.Create(1.387040f);
+ private static readonly Vector256 C_V_0_2758 = Vector256.Create(0.275899f);
+
+ private static readonly Vector256 C_V_n1_9615 = Vector256.Create(-1.961570560f);
+ private static readonly Vector256 C_V_n0_3901 = Vector256.Create(-0.390180644f);
+ private static readonly Vector256 C_V_n0_8999 = Vector256.Create(-0.899976223f);
+ private static readonly Vector256 C_V_n2_5629 = Vector256.Create(-2.562915447f);
+ private static readonly Vector256 C_V_0_2986 = Vector256.Create(0.298631336f);
+ private static readonly Vector256 C_V_2_0531 = Vector256.Create(2.053119869f);
+ private static readonly Vector256