Browse Source

Improved scalar transpose implementation

pull/1761/head
Dmitry Pentin 4 years ago
parent
commit
e4b32dbf28
  1. 63
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  2. 21
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

63
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -632,22 +632,55 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(InliningOptions.ShortMethod)]
private void TransposeInplace_Scalar()
{
float tmp;
int horIndex, verIndex;
// We don't care about the last row as it consists of a single element
// Which won't be swapped with anything
for (int i = 0; i < 7; i++)
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref this);
// row #0
Swap(ref Unsafe.Add(ref elemRef, 1), ref Unsafe.Add(ref elemRef, 8));
Swap(ref Unsafe.Add(ref elemRef, 2), ref Unsafe.Add(ref elemRef, 16));
Swap(ref Unsafe.Add(ref elemRef, 3), ref Unsafe.Add(ref elemRef, 24));
Swap(ref Unsafe.Add(ref elemRef, 4), ref Unsafe.Add(ref elemRef, 32));
Swap(ref Unsafe.Add(ref elemRef, 5), ref Unsafe.Add(ref elemRef, 40));
Swap(ref Unsafe.Add(ref elemRef, 6), ref Unsafe.Add(ref elemRef, 48));
Swap(ref Unsafe.Add(ref elemRef, 7), ref Unsafe.Add(ref elemRef, 56));
// row #1
Swap(ref Unsafe.Add(ref elemRef, 10), ref Unsafe.Add(ref elemRef, 17));
Swap(ref Unsafe.Add(ref elemRef, 11), ref Unsafe.Add(ref elemRef, 25));
Swap(ref Unsafe.Add(ref elemRef, 12), ref Unsafe.Add(ref elemRef, 33));
Swap(ref Unsafe.Add(ref elemRef, 13), ref Unsafe.Add(ref elemRef, 41));
Swap(ref Unsafe.Add(ref elemRef, 14), ref Unsafe.Add(ref elemRef, 49));
Swap(ref Unsafe.Add(ref elemRef, 15), ref Unsafe.Add(ref elemRef, 57));
// row #2
Swap(ref Unsafe.Add(ref elemRef, 19), ref Unsafe.Add(ref elemRef, 26));
Swap(ref Unsafe.Add(ref elemRef, 20), ref Unsafe.Add(ref elemRef, 34));
Swap(ref Unsafe.Add(ref elemRef, 21), ref Unsafe.Add(ref elemRef, 42));
Swap(ref Unsafe.Add(ref elemRef, 22), ref Unsafe.Add(ref elemRef, 50));
Swap(ref Unsafe.Add(ref elemRef, 23), ref Unsafe.Add(ref elemRef, 58));
// row #3
Swap(ref Unsafe.Add(ref elemRef, 28), ref Unsafe.Add(ref elemRef, 35));
Swap(ref Unsafe.Add(ref elemRef, 29), ref Unsafe.Add(ref elemRef, 43));
Swap(ref Unsafe.Add(ref elemRef, 30), ref Unsafe.Add(ref elemRef, 51));
Swap(ref Unsafe.Add(ref elemRef, 31), ref Unsafe.Add(ref elemRef, 59));
// row #4
Swap(ref Unsafe.Add(ref elemRef, 37), ref Unsafe.Add(ref elemRef, 44));
Swap(ref Unsafe.Add(ref elemRef, 38), ref Unsafe.Add(ref elemRef, 52));
Swap(ref Unsafe.Add(ref elemRef, 39), ref Unsafe.Add(ref elemRef, 60));
// row #5
Swap(ref Unsafe.Add(ref elemRef, 46), ref Unsafe.Add(ref elemRef, 53));
Swap(ref Unsafe.Add(ref elemRef, 47), ref Unsafe.Add(ref elemRef, 61));
// row #6
Swap(ref Unsafe.Add(ref elemRef, 55), ref Unsafe.Add(ref elemRef, 62));
static void Swap(ref float a, ref float b)
{
// We don't care about the first element in each row as it's not swapped
for (int j = i + 1; j < 8; j++)
{
horIndex = (i * 8) + j;
verIndex = (j * 8) + i;
tmp = this[horIndex];
this[horIndex] = this[verIndex];
this[verIndex] = tmp;
}
float tmp = a;
a = b;
b = tmp;
}
}

21
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

@ -35,15 +35,18 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
}
/*
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1165 (20H2/October2020Update)
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1237 (20H2/October2020Update)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
AVX : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
No HwIntrinsics : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
| Method | Job | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
|-------------- |---------------- |----------:|----------:|----------:|------:|------:|------:|------:|----------:|
| TransposeInto | No HwIntrinsics | 19.658 ns | 0.0550 ns | 0.0515 ns | 1.00 | - | - | - | - |
| TransposeInto | AVX | 8.613 ns | 0.0249 ns | 0.0208 ns | 0.44 | - | - | - | - |
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
1. No HwIntrinsics : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
2. SSE : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
3. AVX : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
Runtime=.NET Core 3.1
| Method | Job | Mean | Error | StdDev | Ratio |
|----------------- |----------------:|----------:|----------:|----------:|------:|
| TransposeInplace | No HwIntrinsics | 12.531 ns | 0.0637 ns | 0.0565 ns | 1.00 |
| TransposeInplace | AVX | 5.767 ns | 0.0529 ns | 0.0495 ns | 0.46 |
*/

Loading…
Cancel
Save