diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs
index 179f9aa287..c2a95989d3 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs
@@ -29,7 +29,50 @@ internal partial struct Block8x8F
return;
}
- // TODO: Optimize: implement all cases with scale-specific, loopless code!
+ if (horizontalScale == 2 && verticalScale == 1)
+ {
+ this.CopyTo2x1Scale(ref areaOrigin, (uint)areaStride);
+ return;
+ }
+
+ if (horizontalScale == 1 && verticalScale == 2)
+ {
+ this.CopyTo1x2Scale(ref areaOrigin, (uint)areaStride);
+ return;
+ }
+
+ if (horizontalScale == 4 && verticalScale == 1)
+ {
+ this.CopyTo4x1Scale(ref areaOrigin, (uint)areaStride);
+ return;
+ }
+
+ if (horizontalScale == 4 && verticalScale == 2)
+ {
+ this.CopyTo4x2Scale(ref areaOrigin, (uint)areaStride);
+ return;
+ }
+
+ if (horizontalScale == 1 && verticalScale == 4)
+ {
+ this.CopyTo1x4Scale(ref areaOrigin, (uint)areaStride);
+ return;
+ }
+
+ if (horizontalScale == 2 && verticalScale == 4)
+ {
+ this.CopyTo2x4Scale(ref areaOrigin, (uint)areaStride);
+ return;
+ }
+
+ if (horizontalScale == 4 && verticalScale == 4)
+ {
+ this.CopyTo4x4Scale(ref areaOrigin, (uint)areaStride);
+ return;
+ }
+
+ // The common 1x, 2x, and 4x integral scales are specialized above.
+ // Uncommon legal factor-3 scales use the generic fallback.
this.CopyArbitraryScale(ref areaOrigin, (uint)areaStride, (uint)horizontalScale, (uint)verticalScale);
}
@@ -85,6 +128,285 @@ internal partial struct Block8x8F
}
}
+ ///
+ /// Copies the full 8x8 block into the destination buffer while doubling only the horizontal axis.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void CopyTo2x1Scale(ref float areaOrigin, uint areaStride)
+ {
+ ref Vector4 sourceBase = ref this.V0L;
+
+ WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride);
+ }
+
+ ///
+ /// Copies the full 8x8 block into the destination buffer while doubling only the vertical axis.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void CopyTo1x2Scale(ref float areaOrigin, uint areaStride)
+ {
+ ref Vector4 sourceBase = ref this.V0L;
+
+ CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride);
+ }
+
+ ///
+ /// Copies the full 8x8 block into the destination buffer while quadrupling only the horizontal axis.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void CopyTo4x1Scale(ref float areaOrigin, uint areaStride)
+ {
+ ref Vector4 sourceBase = ref this.V0L;
+
+ ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride);
+ }
+
+ ///
+ /// Copies the full 8x8 block into the destination buffer while quadrupling horizontally and doubling vertically.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void CopyTo4x2Scale(ref float areaOrigin, uint areaStride)
+ {
+ ref Vector4 sourceBase = ref this.V0L;
+
+ ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride);
+ }
+
+ ///
+ /// Copies the full 8x8 block into the destination buffer while quadrupling only the vertical axis.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void CopyTo1x4Scale(ref float areaOrigin, uint areaStride)
+ {
+ ref Vector4 sourceBase = ref this.V0L;
+
+ CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride);
+ CopyRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride);
+ }
+
+ ///
+ /// Copies the full 8x8 block into the destination buffer while doubling horizontally and quadrupling vertically.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void CopyTo2x4Scale(ref float areaOrigin, uint areaStride)
+ {
+ ref Vector4 sourceBase = ref this.V0L;
+
+ WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride);
+ WidenRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride);
+ }
+
+ ///
+ /// Copies the full 8x8 block into the destination buffer while quadrupling both axes.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private void CopyTo4x4Scale(ref float areaOrigin, uint areaStride)
+ {
+ ref Vector4 sourceBase = ref this.V0L;
+
+ ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride);
+ ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride);
+ }
+
+ ///
+ /// Copies one eight-sample row from the full block to the destination row.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static void CopyRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride)
+ {
+ ref Vector4 source = ref Unsafe.Add(ref sourceBase, sourceRow * 2u);
+ ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride));
+
+ dest = source;
+ Unsafe.Add(ref dest, 1u) = Unsafe.Add(ref source, 1u);
+ }
+
+ ///
+ /// Expands one eight-sample row to sixteen samples by duplicating each source value horizontally.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static void WidenRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride)
+ {
+ ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u);
+ ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u);
+ ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride));
+
+ Vector4 xyLeft = new(sourceLeft.X);
+ xyLeft.Z = sourceLeft.Y;
+ xyLeft.W = sourceLeft.Y;
+
+ Vector4 zwLeft = new(sourceLeft.Z);
+ zwLeft.Z = sourceLeft.W;
+ zwLeft.W = sourceLeft.W;
+
+ Vector4 xyRight = new(sourceRight.X);
+ xyRight.Z = sourceRight.Y;
+ xyRight.W = sourceRight.Y;
+
+ Vector4 zwRight = new(sourceRight.Z);
+ zwRight.Z = sourceRight.W;
+ zwRight.W = sourceRight.W;
+
+ dest = xyLeft;
+ Unsafe.Add(ref dest, 1u) = zwLeft;
+ Unsafe.Add(ref dest, 2u) = xyRight;
+ Unsafe.Add(ref dest, 3u) = zwRight;
+ }
+
+ ///
+ /// Expands one eight-sample row to thirty-two samples by duplicating each source value four times horizontally.
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static void ExpandRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride)
+ {
+ ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u);
+ ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u);
+ ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride));
+
+ dest = new Vector4(sourceLeft.X);
+ Unsafe.Add(ref dest, 1u) = new Vector4(sourceLeft.Y);
+ Unsafe.Add(ref dest, 2u) = new Vector4(sourceLeft.Z);
+ Unsafe.Add(ref dest, 3u) = new Vector4(sourceLeft.W);
+ Unsafe.Add(ref dest, 4u) = new Vector4(sourceRight.X);
+ Unsafe.Add(ref dest, 5u) = new Vector4(sourceRight.Y);
+ Unsafe.Add(ref dest, 6u) = new Vector4(sourceRight.Z);
+ Unsafe.Add(ref dest, 7u) = new Vector4(sourceRight.W);
+ }
+
[MethodImpl(InliningOptions.ColdPath)]
private void CopyArbitraryScale(ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale)
{
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
index 19a695d07d..f3767c8884 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
@@ -132,7 +132,7 @@ internal partial struct Block8x8F : IEquatable
///
/// Destination
[MethodImpl(InliningOptions.ShortMethod)]
- public void ScaledCopyTo(float[] dest)
+ public readonly void ScaledCopyTo(float[] dest)
{
DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, Size, "dest is too small");