diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs
index 8e0d526e5..ffd405714 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs
@@ -40,6 +40,36 @@ internal partial struct Block8x8F
this.V7R = NormalizeAndRoundVector128(this.V7R.AsVector128(), off, max).AsVector4();
}
+ ///
+ /// Loads values from using extended AVX2 intrinsics.
+ ///
+ /// The source
+ public void LoadFromInt16ExtendedVector128(ref Block8x8 source)
+ {
+ DebugGuard.IsTrue(Vector128.IsHardwareAccelerated, "Vector128 support is required to run this operation!");
+
+ ref Vector128 srcBase = ref Unsafe.As>(ref source);
+ ref Vector128 destBase = ref Unsafe.As>(ref this);
+
+ // Only 8 iterations, one per 128b short block
+ for (nuint i = 0; i < 8; i++)
+ {
+ Vector128 src = Unsafe.Add(ref srcBase, i);
+
+ // Step 1: Widen short -> int
+ Vector128 lower = Vector128.WidenLower(src); // lower 4 shorts -> 4 ints
+ Vector128 upper = Vector128.WidenUpper(src); // upper 4 shorts -> 4 ints
+
+ // Step 2: Convert int -> float
+ Vector128 lowerF = Vector128.ConvertToSingle(lower);
+ Vector128 upperF = Vector128.ConvertToSingle(upper);
+
+ // Step 3: Store to destination (this is 16 lanes -> two Vector128 blocks)
+ Unsafe.Add(ref destBase, (i * 2) + 0) = lowerF;
+ Unsafe.Add(ref destBase, (i * 2) + 1) = upperF;
+ }
+ }
+
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128 NormalizeAndRoundVector128(Vector128 value, Vector128 off, Vector128 max)
=> Vector128_.RoundToNearestInteger(Vector128_.Clamp(value + off, Vector128.Zero, max));
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
index 284c5bfe5..f7ef44384 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
@@ -392,6 +392,11 @@ internal partial struct Block8x8F : IEquatable
this.LoadFromInt16ExtendedAvx2(ref source);
return;
}
+ else if (Vector128.IsHardwareAccelerated)
+ {
+ this.LoadFromInt16ExtendedVector128(ref source);
+ return;
+ }
this.LoadFromInt16Scalar(ref source);
}
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
index 7b73c0c52..1c5d15dc2 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
@@ -55,7 +55,7 @@ public partial class Block8x8FTests : JpegFixture
Times,
() =>
{
- var block = default(Block8x8F);
+ Block8x8F block = default;
for (int i = 0; i < Block8x8F.Size; i++)
{
@@ -68,7 +68,7 @@ public partial class Block8x8FTests : JpegFixture
sum += block[i];
}
});
- Assert.Equal(sum, 64f * 63f * 0.5f);
+ Assert.Equal(64f * 63f * 0.5f, sum);
}
[Fact]
@@ -93,7 +93,7 @@ public partial class Block8x8FTests : JpegFixture
sum += block[i];
}
});
- Assert.Equal(sum, 64f * 63f * 0.5f);
+ Assert.Equal(64f * 63f * 0.5f, sum);
}
[Fact]
@@ -121,7 +121,7 @@ public partial class Block8x8FTests : JpegFixture
}
[Fact]
- public void TransposeInplace()
+ public void TransposeInPlace()
{
static void RunTest()
{
@@ -276,7 +276,7 @@ public partial class Block8x8FTests : JpegFixture
float[] data = Create8x8RandomFloatData(-1000, 1000);
Block8x8F source = Block8x8F.Load(data);
- var dest = default(Block8x8);
+ Block8x8 dest = default;
source.RoundInto(ref dest);
@@ -388,7 +388,7 @@ public partial class Block8x8FTests : JpegFixture
short[] data = Create8x8ShortData();
- var source = Block8x8.Load(data);
+ Block8x8 source = Block8x8.Load(data);
Block8x8F dest = default;
dest.LoadFromInt16Scalar(ref source);
@@ -399,6 +399,27 @@ public partial class Block8x8FTests : JpegFixture
}
}
+ [Fact]
+ public void LoadFromUInt16ExtendedVector128()
+ {
+ if (this.SkipOnNonVector128Runner())
+ {
+ return;
+ }
+
+ short[] data = Create8x8ShortData();
+
+ Block8x8 source = Block8x8.Load(data);
+
+ Block8x8F dest = default;
+ dest.LoadFromInt16ExtendedVector128(ref source);
+
+ for (int i = 0; i < Block8x8F.Size; i++)
+ {
+ Assert.Equal(data[i], dest[i]);
+ }
+ }
+
[Fact]
public void LoadFromUInt16ExtendedAvx2()
{
@@ -409,7 +430,7 @@ public partial class Block8x8FTests : JpegFixture
short[] data = Create8x8ShortData();
- var source = Block8x8.Load(data);
+ Block8x8 source = Block8x8.Load(data);
Block8x8F dest = default;
dest.LoadFromInt16ExtendedAvx2(ref source);