diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs index 8e0d526e5..ffd405714 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs @@ -40,6 +40,36 @@ internal partial struct Block8x8F this.V7R = NormalizeAndRoundVector128(this.V7R.AsVector128(), off, max).AsVector4(); } + /// + /// Loads values from using extended AVX2 intrinsics. + /// + /// The source + public void LoadFromInt16ExtendedVector128(ref Block8x8 source) + { + DebugGuard.IsTrue(Vector128.IsHardwareAccelerated, "Vector128 support is required to run this operation!"); + + ref Vector128 srcBase = ref Unsafe.As>(ref source); + ref Vector128 destBase = ref Unsafe.As>(ref this); + + // Only 8 iterations, one per 128b short block + for (nuint i = 0; i < 8; i++) + { + Vector128 src = Unsafe.Add(ref srcBase, i); + + // Step 1: Widen short -> int + Vector128 lower = Vector128.WidenLower(src); // lower 4 shorts -> 4 ints + Vector128 upper = Vector128.WidenUpper(src); // upper 4 shorts -> 4 ints + + // Step 2: Convert int -> float + Vector128 lowerF = Vector128.ConvertToSingle(lower); + Vector128 upperF = Vector128.ConvertToSingle(upper); + + // Step 3: Store to destination (this is 16 lanes -> two Vector128 blocks) + Unsafe.Add(ref destBase, (i * 2) + 0) = lowerF; + Unsafe.Add(ref destBase, (i * 2) + 1) = upperF; + } + } + [MethodImpl(InliningOptions.ShortMethod)] private static Vector128 NormalizeAndRoundVector128(Vector128 value, Vector128 off, Vector128 max) => Vector128_.RoundToNearestInteger(Vector128_.Clamp(value + off, Vector128.Zero, max)); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 284c5bfe5..f7ef44384 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -392,6 +392,11 @@ internal partial struct Block8x8F : IEquatable this.LoadFromInt16ExtendedAvx2(ref source); return; } + else if (Vector128.IsHardwareAccelerated) + { + this.LoadFromInt16ExtendedVector128(ref source); + return; + } this.LoadFromInt16Scalar(ref source); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 7b73c0c52..1c5d15dc2 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -55,7 +55,7 @@ public partial class Block8x8FTests : JpegFixture Times, () => { - var block = default(Block8x8F); + Block8x8F block = default; for (int i = 0; i < Block8x8F.Size; i++) { @@ -68,7 +68,7 @@ public partial class Block8x8FTests : JpegFixture sum += block[i]; } }); - Assert.Equal(sum, 64f * 63f * 0.5f); + Assert.Equal(64f * 63f * 0.5f, sum); } [Fact] @@ -93,7 +93,7 @@ public partial class Block8x8FTests : JpegFixture sum += block[i]; } }); - Assert.Equal(sum, 64f * 63f * 0.5f); + Assert.Equal(64f * 63f * 0.5f, sum); } [Fact] @@ -121,7 +121,7 @@ public partial class Block8x8FTests : JpegFixture } [Fact] - public void TransposeInplace() + public void TransposeInPlace() { static void RunTest() { @@ -276,7 +276,7 @@ public partial class Block8x8FTests : JpegFixture float[] data = Create8x8RandomFloatData(-1000, 1000); Block8x8F source = Block8x8F.Load(data); - var dest = default(Block8x8); + Block8x8 dest = default; source.RoundInto(ref dest); @@ -388,7 +388,7 @@ public partial class Block8x8FTests : JpegFixture short[] data = Create8x8ShortData(); - var source = Block8x8.Load(data); + Block8x8 source = Block8x8.Load(data); Block8x8F dest = default; dest.LoadFromInt16Scalar(ref source); @@ -399,6 +399,27 @@ public partial class Block8x8FTests : JpegFixture } } + [Fact] + public void LoadFromUInt16ExtendedVector128() + { + if (this.SkipOnNonVector128Runner()) + { + return; + } + + short[] data = Create8x8ShortData(); + + Block8x8 source = Block8x8.Load(data); + + Block8x8F dest = default; + dest.LoadFromInt16ExtendedVector128(ref source); + + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.Equal(data[i], dest[i]); + } + } + [Fact] public void LoadFromUInt16ExtendedAvx2() { @@ -409,7 +430,7 @@ public partial class Block8x8FTests : JpegFixture short[] data = Create8x8ShortData(); - var source = Block8x8.Load(data); + Block8x8 source = Block8x8.Load(data); Block8x8F dest = default; dest.LoadFromInt16ExtendedAvx2(ref source);