diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 0bd20b441..02f5a1324 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -632,22 +632,55 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components [MethodImpl(InliningOptions.ShortMethod)] private void TransposeInplace_Scalar() { - float tmp; - int horIndex, verIndex; - - // We don't care about the last row as it consists of a single element - // Which won't be swapped with anything - for (int i = 0; i < 7; i++) + ref float elemRef = ref Unsafe.As(ref this); + + // row #0 + Swap(ref Unsafe.Add(ref elemRef, 1), ref Unsafe.Add(ref elemRef, 8)); + Swap(ref Unsafe.Add(ref elemRef, 2), ref Unsafe.Add(ref elemRef, 16)); + Swap(ref Unsafe.Add(ref elemRef, 3), ref Unsafe.Add(ref elemRef, 24)); + Swap(ref Unsafe.Add(ref elemRef, 4), ref Unsafe.Add(ref elemRef, 32)); + Swap(ref Unsafe.Add(ref elemRef, 5), ref Unsafe.Add(ref elemRef, 40)); + Swap(ref Unsafe.Add(ref elemRef, 6), ref Unsafe.Add(ref elemRef, 48)); + Swap(ref Unsafe.Add(ref elemRef, 7), ref Unsafe.Add(ref elemRef, 56)); + + // row #1 + Swap(ref Unsafe.Add(ref elemRef, 10), ref Unsafe.Add(ref elemRef, 17)); + Swap(ref Unsafe.Add(ref elemRef, 11), ref Unsafe.Add(ref elemRef, 25)); + Swap(ref Unsafe.Add(ref elemRef, 12), ref Unsafe.Add(ref elemRef, 33)); + Swap(ref Unsafe.Add(ref elemRef, 13), ref Unsafe.Add(ref elemRef, 41)); + Swap(ref Unsafe.Add(ref elemRef, 14), ref Unsafe.Add(ref elemRef, 49)); + Swap(ref Unsafe.Add(ref elemRef, 15), ref Unsafe.Add(ref elemRef, 57)); + + // row #2 + Swap(ref Unsafe.Add(ref elemRef, 19), ref Unsafe.Add(ref elemRef, 26)); + Swap(ref Unsafe.Add(ref elemRef, 20), ref Unsafe.Add(ref elemRef, 34)); + Swap(ref Unsafe.Add(ref elemRef, 21), ref Unsafe.Add(ref elemRef, 42)); + Swap(ref Unsafe.Add(ref elemRef, 22), ref Unsafe.Add(ref elemRef, 50)); + Swap(ref Unsafe.Add(ref elemRef, 23), ref Unsafe.Add(ref elemRef, 58)); + + // row #3 + Swap(ref Unsafe.Add(ref elemRef, 28), ref Unsafe.Add(ref elemRef, 35)); + Swap(ref Unsafe.Add(ref elemRef, 29), ref Unsafe.Add(ref elemRef, 43)); + Swap(ref Unsafe.Add(ref elemRef, 30), ref Unsafe.Add(ref elemRef, 51)); + Swap(ref Unsafe.Add(ref elemRef, 31), ref Unsafe.Add(ref elemRef, 59)); + + // row #4 + Swap(ref Unsafe.Add(ref elemRef, 37), ref Unsafe.Add(ref elemRef, 44)); + Swap(ref Unsafe.Add(ref elemRef, 38), ref Unsafe.Add(ref elemRef, 52)); + Swap(ref Unsafe.Add(ref elemRef, 39), ref Unsafe.Add(ref elemRef, 60)); + + // row #5 + Swap(ref Unsafe.Add(ref elemRef, 46), ref Unsafe.Add(ref elemRef, 53)); + Swap(ref Unsafe.Add(ref elemRef, 47), ref Unsafe.Add(ref elemRef, 61)); + + // row #6 + Swap(ref Unsafe.Add(ref elemRef, 55), ref Unsafe.Add(ref elemRef, 62)); + + static void Swap(ref float a, ref float b) { - // We don't care about the first element in each row as it's not swapped - for (int j = i + 1; j < 8; j++) - { - horIndex = (i * 8) + j; - verIndex = (j * 8) + i; - tmp = this[horIndex]; - this[horIndex] = this[verIndex]; - this[verIndex] = tmp; - } + float tmp = a; + a = b; + b = tmp; } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs index f60121d33..c2efb517a 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -35,15 +35,18 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations } /* -BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1165 (20H2/October2020Update) +BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1237 (20H2/October2020Update) Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores .NET SDK=6.0.100-preview.3.21202.5 - [Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT - AVX : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT - No HwIntrinsics : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT - -| Method | Job | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | -|-------------- |---------------- |----------:|----------:|----------:|------:|------:|------:|------:|----------:| -| TransposeInto | No HwIntrinsics | 19.658 ns | 0.0550 ns | 0.0515 ns | 1.00 | - | - | - | - | -| TransposeInto | AVX | 8.613 ns | 0.0249 ns | 0.0208 ns | 0.44 | - | - | - | - | + [Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT + 1. No HwIntrinsics : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT + 2. SSE : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT + 3. AVX : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT + +Runtime=.NET Core 3.1 + +| Method | Job | Mean | Error | StdDev | Ratio | +|----------------- |----------------:|----------:|----------:|----------:|------:| +| TransposeInplace | No HwIntrinsics | 12.531 ns | 0.0637 ns | 0.0565 ns | 1.00 | +| TransposeInplace | AVX | 5.767 ns | 0.0529 ns | 0.0495 ns | 0.46 | */