Browse Source

Optimize Block8x8F ScaledCopy for common scales

pull/3115/head
James Jackson-South 3 weeks ago
parent
commit
ac0adfccac
  1. 324
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs
  2. 2
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

324
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopy.cs

@ -29,7 +29,50 @@ internal partial struct Block8x8F
return;
}
// TODO: Optimize: implement all cases with scale-specific, loopless code!
if (horizontalScale == 2 && verticalScale == 1)
{
this.CopyTo2x1Scale(ref areaOrigin, (uint)areaStride);
return;
}
if (horizontalScale == 1 && verticalScale == 2)
{
this.CopyTo1x2Scale(ref areaOrigin, (uint)areaStride);
return;
}
if (horizontalScale == 4 && verticalScale == 1)
{
this.CopyTo4x1Scale(ref areaOrigin, (uint)areaStride);
return;
}
if (horizontalScale == 4 && verticalScale == 2)
{
this.CopyTo4x2Scale(ref areaOrigin, (uint)areaStride);
return;
}
if (horizontalScale == 1 && verticalScale == 4)
{
this.CopyTo1x4Scale(ref areaOrigin, (uint)areaStride);
return;
}
if (horizontalScale == 2 && verticalScale == 4)
{
this.CopyTo2x4Scale(ref areaOrigin, (uint)areaStride);
return;
}
if (horizontalScale == 4 && verticalScale == 4)
{
this.CopyTo4x4Scale(ref areaOrigin, (uint)areaStride);
return;
}
// The common 1x, 2x, and 4x integral scales are specialized above.
// Uncommon legal factor-3 scales use the generic fallback.
this.CopyArbitraryScale(ref areaOrigin, (uint)areaStride, (uint)horizontalScale, (uint)verticalScale);
}
@ -85,6 +128,285 @@ internal partial struct Block8x8F
}
}
/// <summary>
/// Copies the full 8x8 block into the destination buffer while doubling only the horizontal axis.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void CopyTo2x1Scale(ref float areaOrigin, uint areaStride)
{
ref Vector4 sourceBase = ref this.V0L;
WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride);
}
/// <summary>
/// Copies the full 8x8 block into the destination buffer while doubling only the vertical axis.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void CopyTo1x2Scale(ref float areaOrigin, uint areaStride)
{
ref Vector4 sourceBase = ref this.V0L;
CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride);
}
/// <summary>
/// Copies the full 8x8 block into the destination buffer while quadrupling only the horizontal axis.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void CopyTo4x1Scale(ref float areaOrigin, uint areaStride)
{
ref Vector4 sourceBase = ref this.V0L;
ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 1u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 2u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 3u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 4u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 5u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 6u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 7u, areaStride);
}
/// <summary>
/// Copies the full 8x8 block into the destination buffer while quadrupling horizontally and doubling vertically.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void CopyTo4x2Scale(ref float areaOrigin, uint areaStride)
{
ref Vector4 sourceBase = ref this.V0L;
ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 2u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 3u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 4u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 5u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 6u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 7u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 8u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 9u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 10u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 11u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 12u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 13u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 14u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 15u, areaStride);
}
/// <summary>
/// Copies the full 8x8 block into the destination buffer while quadrupling only the vertical axis.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void CopyTo1x4Scale(ref float areaOrigin, uint areaStride)
{
ref Vector4 sourceBase = ref this.V0L;
CopyRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride);
CopyRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride);
}
/// <summary>
/// Copies the full 8x8 block into the destination buffer while doubling horizontally and quadrupling vertically.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void CopyTo2x4Scale(ref float areaOrigin, uint areaStride)
{
ref Vector4 sourceBase = ref this.V0L;
WidenRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride);
WidenRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride);
}
/// <summary>
/// Copies the full 8x8 block into the destination buffer while quadrupling both axes.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void CopyTo4x4Scale(ref float areaOrigin, uint areaStride)
{
ref Vector4 sourceBase = ref this.V0L;
ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 0u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 1u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 2u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 0u, 3u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 4u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 5u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 6u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 1u, 7u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 8u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 9u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 10u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 2u, 11u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 12u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 13u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 14u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 3u, 15u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 16u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 17u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 18u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 4u, 19u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 20u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 21u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 22u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 5u, 23u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 24u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 25u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 26u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 6u, 27u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 28u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 29u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 30u, areaStride);
ExpandRow8(ref sourceBase, ref areaOrigin, 7u, 31u, areaStride);
}
/// <summary>
/// Copies one eight-sample row from the full block to the destination row.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private static void CopyRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride)
{
ref Vector4 source = ref Unsafe.Add(ref sourceBase, sourceRow * 2u);
ref Vector4 dest = ref Unsafe.As<float, Vector4>(ref Unsafe.Add(ref areaOrigin, destRow * areaStride));
dest = source;
Unsafe.Add(ref dest, 1u) = Unsafe.Add(ref source, 1u);
}
/// <summary>
/// Expands one eight-sample row to sixteen samples by duplicating each source value horizontally.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private static void WidenRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride)
{
ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u);
ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u);
ref Vector4 dest = ref Unsafe.As<float, Vector4>(ref Unsafe.Add(ref areaOrigin, destRow * areaStride));
Vector4 xyLeft = new(sourceLeft.X);
xyLeft.Z = sourceLeft.Y;
xyLeft.W = sourceLeft.Y;
Vector4 zwLeft = new(sourceLeft.Z);
zwLeft.Z = sourceLeft.W;
zwLeft.W = sourceLeft.W;
Vector4 xyRight = new(sourceRight.X);
xyRight.Z = sourceRight.Y;
xyRight.W = sourceRight.Y;
Vector4 zwRight = new(sourceRight.Z);
zwRight.Z = sourceRight.W;
zwRight.W = sourceRight.W;
dest = xyLeft;
Unsafe.Add(ref dest, 1u) = zwLeft;
Unsafe.Add(ref dest, 2u) = xyRight;
Unsafe.Add(ref dest, 3u) = zwRight;
}
/// <summary>
/// Expands one eight-sample row to thirty-two samples by duplicating each source value four times horizontally.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private static void ExpandRow8(ref Vector4 sourceBase, ref float areaOrigin, nuint sourceRow, nuint destRow, uint areaStride)
{
ref Vector4 sourceLeft = ref Unsafe.Add(ref sourceBase, sourceRow * 2u);
ref Vector4 sourceRight = ref Unsafe.Add(ref sourceLeft, 1u);
ref Vector4 dest = ref Unsafe.As<float, Vector4>(ref Unsafe.Add(ref areaOrigin, destRow * areaStride));
dest = new Vector4(sourceLeft.X);
Unsafe.Add(ref dest, 1u) = new Vector4(sourceLeft.Y);
Unsafe.Add(ref dest, 2u) = new Vector4(sourceLeft.Z);
Unsafe.Add(ref dest, 3u) = new Vector4(sourceLeft.W);
Unsafe.Add(ref dest, 4u) = new Vector4(sourceRight.X);
Unsafe.Add(ref dest, 5u) = new Vector4(sourceRight.Y);
Unsafe.Add(ref dest, 6u) = new Vector4(sourceRight.Z);
Unsafe.Add(ref dest, 7u) = new Vector4(sourceRight.W);
}
[MethodImpl(InliningOptions.ColdPath)]
private void CopyArbitraryScale(ref float areaOrigin, uint areaStride, uint horizontalScale, uint verticalScale)
{

2
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -132,7 +132,7 @@ internal partial struct Block8x8F : IEquatable<Block8x8F>
/// </summary>
/// <param name="dest">Destination</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void ScaledCopyTo(float[] dest)
public readonly void ScaledCopyTo(float[] dest)
{
DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, Size, "dest is too small");

Loading…
Cancel
Save