diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs index 179f9aa287..c2a95989d3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs @@ -29,7 +29,50 @@ internal partial struct Block8x8F return; } - // TODO: Optimize: implement all cases with scale-specific, loopless code! + if (horizontalScale == 2 && verticalScale == 1) + { + this.CopyTo2x1Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 1 && verticalScale == 2) + { + this.CopyTo1x2Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 4 && verticalScale == 1) + { + this.CopyTo4x1Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 4 && verticalScale == 2) + { + this.CopyTo4x2Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 1 && verticalScale == 4) + { + this.CopyTo1x4Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 2 && verticalScale == 4) + { + this.CopyTo2x4Scale(ref areaOrigin, (uint)areaStride); + return; + } + + if (horizontalScale == 4 && verticalScale == 4) + { + this.CopyTo4x4Scale(ref areaOrigin, (uint)areaStride); + return; + } + + // The common 1x, 2x, and 4x integral scales are specialized above. + // Uncommon legal factor-3 scales use the generic fallback. this.CopyArbitraryScale(ref areaOrigin, (uint)areaStride, (uint)horizontalScale, (uint)verticalScale); } @@ -85,6 +128,285 @@ internal partial struct Block8x8F } } + /// + /// Copies the full 8x8 block into the destination buffer while doubling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo2x1Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while doubling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo1x2Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling only the horizontal axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo4x1Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling horizontally and doubling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo4x2Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling only the vertical axis. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo1x4Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride); + CopyRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while doubling horizontally and quadrupling vertically. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo2x4Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride); + WidenRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride); + } + + /// + /// Copies the full 8x8 block into the destination buffer while quadrupling both axes. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private void CopyTo4x4Scale(ref float areaOrigin, uint areaStride) + { + ref Vector4 sourceBase = ref this.V0L; + + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride); + ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride); + } + + /// + /// Copies one eight-sample row from the full block to the destination row. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void CopyRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref Vector4 source = ref Unsafe.Add(ref sourceBase, sourceRow * 2u); + ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride)); + + dest = source; + Unsafe.Add(ref dest, 1u) = Unsafe.Add(ref source, 1u); + } + + /// + /// Expands one eight-sample row to sixteen samples by duplicating each source value horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void WidenRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u); + ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u); + ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride)); + + Vector4 xyLeft = new(sourceLeft.X); + xyLeft.Z = sourceLeft.Y; + xyLeft.W = sourceLeft.Y; + + Vector4 zwLeft = new(sourceLeft.Z); + zwLeft.Z = sourceLeft.W; + zwLeft.W = sourceLeft.W; + + Vector4 xyRight = new(sourceRight.X); + xyRight.Z = sourceRight.Y; + xyRight.W = sourceRight.Y; + + Vector4 zwRight = new(sourceRight.Z); + zwRight.Z = sourceRight.W; + zwRight.W = sourceRight.W; + + dest = xyLeft; + Unsafe.Add(ref dest, 1u) = zwLeft; + Unsafe.Add(ref dest, 2u) = xyRight; + Unsafe.Add(ref dest, 3u) = zwRight; + } + + /// + /// Expands one eight-sample row to thirty-two samples by duplicating each source value four times horizontally. + /// + [MethodImpl(InliningOptions.ShortMethod)] + private static void ExpandRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride) + { + ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u); + ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u); + ref Vector4 dest = ref Unsafe.As(ref Unsafe.Add(ref areaOrigin, destRow * areaStride)); + + dest = new Vector4(sourceLeft.X); + Unsafe.Add(ref dest, 1u) = new Vector4(sourceLeft.Y); + Unsafe.Add(ref dest, 2u) = new Vector4(sourceLeft.Z); + Unsafe.Add(ref dest, 3u) = new Vector4(sourceLeft.W); + Unsafe.Add(ref dest, 4u) = new Vector4(sourceRight.X); + Unsafe.Add(ref dest, 5u) = new Vector4(sourceRight.Y); + Unsafe.Add(ref dest, 6u) = new Vector4(sourceRight.Z); + Unsafe.Add(ref dest, 7u) = new Vector4(sourceRight.W); + } + [MethodImpl(InliningOptions.ColdPath)] private void CopyArbitraryScale(ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 19a695d07d..f3767c8884 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -132,7 +132,7 @@ internal partial struct Block8x8F : IEquatable /// /// Destination [MethodImpl(InliningOptions.ShortMethod)] - public void ScaledCopyTo(float[] dest) + public readonly void ScaledCopyTo(float[] dest) { DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, Size, "dest is too small");