📷 A modern, cross-platform, 2D Graphics library for .NET
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

498 lines
18 KiB

// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
public unsafe class Block8x8F_Round
{
private Block8x8F block;
private readonly byte[] blockBuffer = new byte[512];
private GCHandle blockHandle;
private float* alignedPtr;
[GlobalSetup]
public void Setup()
{
if (Vector<float>.Count != 8)
{
throw new NotSupportedException("Vector<float>.Count != 8");
}
this.blockHandle = GCHandle.Alloc(this.blockBuffer, GCHandleType.Pinned);
ulong ptr = (ulong)this.blockHandle.AddrOfPinnedObject();
ptr += 16;
ptr -= ptr % 16;
if (ptr % 16 != 0)
{
throw new Exception("ptr is unaligned");
}
this.alignedPtr = (float*)ptr;
}
[GlobalCleanup]
public void Cleanup()
{
this.blockHandle.Free();
this.alignedPtr = null;
}
[Benchmark]
public void ScalarRound()
{
ref float b = ref Unsafe.As<Block8x8F, float>(ref this.block);
for (int i = 0; i < Block8x8F.Size; i++)
{
ref float v = ref Unsafe.Add(ref b, i);
v = (float)Math.Round(v);
}
}
[Benchmark(Baseline = true)]
public void SimdUtils_FastRound_Vector8()
{
ref Block8x8F b = ref this.block;
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V0L);
row0 = SimdUtils.FastRound(row0);
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V1L);
row1 = SimdUtils.FastRound(row1);
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V2L);
row2 = SimdUtils.FastRound(row2);
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V3L);
row3 = SimdUtils.FastRound(row3);
ref Vector<float> row4 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V4L);
row4 = SimdUtils.FastRound(row4);
ref Vector<float> row5 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V5L);
row5 = SimdUtils.FastRound(row5);
ref Vector<float> row6 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V6L);
row6 = SimdUtils.FastRound(row6);
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V7L);
row7 = SimdUtils.FastRound(row7);
}
[Benchmark]
public void SimdUtils_FastRound_Vector8_ForceAligned()
{
ref Block8x8F b = ref Unsafe.AsRef<Block8x8F>(this.alignedPtr);
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V0L);
row0 = SimdUtils.FastRound(row0);
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V1L);
row1 = SimdUtils.FastRound(row1);
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V2L);
row2 = SimdUtils.FastRound(row2);
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V3L);
row3 = SimdUtils.FastRound(row3);
ref Vector<float> row4 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V4L);
row4 = SimdUtils.FastRound(row4);
ref Vector<float> row5 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V5L);
row5 = SimdUtils.FastRound(row5);
ref Vector<float> row6 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V6L);
row6 = SimdUtils.FastRound(row6);
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V7L);
row7 = SimdUtils.FastRound(row7);
}
[Benchmark]
public void SimdUtils_FastRound_Vector8_Grouped()
{
ref Block8x8F b = ref this.block;
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V0L);
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V1L);
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V2L);
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V3L);
row0 = SimdUtils.FastRound(row0);
row1 = SimdUtils.FastRound(row1);
row2 = SimdUtils.FastRound(row2);
row3 = SimdUtils.FastRound(row3);
row0 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V4L);
row1 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V5L);
row2 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V6L);
row3 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V7L);
row0 = SimdUtils.FastRound(row0);
row1 = SimdUtils.FastRound(row1);
row2 = SimdUtils.FastRound(row2);
row3 = SimdUtils.FastRound(row3);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void Sse41_V1()
{
ref Vector128<float> b0 = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
ref Vector128<float> p = ref b0;
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 1);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 2);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 3);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 4);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 5);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 6);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 7);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 8);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 9);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 10);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 11);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 12);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 13);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 14);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 15);
p = Sse41.RoundToNearestInteger(p);
}
[Benchmark]
public unsafe void Sse41_V2()
{
ref Vector128<float> p = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
p = Sse41.RoundToNearestInteger(p);
var offset = (IntPtr)sizeof(Vector128<float>);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
}
[Benchmark]
public unsafe void Sse41_V3()
{
ref Vector128<float> p = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
p = Sse41.RoundToNearestInteger(p);
var offset = (IntPtr)sizeof(Vector128<float>);
for (int i = 0; i < 15; i++)
{
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
}
}
[Benchmark]
public unsafe void Sse41_V4()
{
ref Vector128<float> p = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
var offset = (IntPtr)sizeof(Vector128<float>);
ref Vector128<float> a = ref p;
ref Vector128<float> b = ref Unsafe.AddByteOffset(ref a, offset);
ref Vector128<float> c = ref Unsafe.AddByteOffset(ref b, offset);
ref Vector128<float> d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
a = ref Unsafe.AddByteOffset(ref d, offset);
b = ref Unsafe.AddByteOffset(ref a, offset);
c = ref Unsafe.AddByteOffset(ref b, offset);
d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
a = ref Unsafe.AddByteOffset(ref d, offset);
b = ref Unsafe.AddByteOffset(ref a, offset);
c = ref Unsafe.AddByteOffset(ref b, offset);
d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
a = ref Unsafe.AddByteOffset(ref d, offset);
b = ref Unsafe.AddByteOffset(ref a, offset);
c = ref Unsafe.AddByteOffset(ref b, offset);
d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
}
[Benchmark]
public unsafe void Sse41_V5_Unaligned()
{
float* p = this.alignedPtr + 1;
Vector128<float> v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
}
[Benchmark]
public unsafe void Sse41_V5_Aligned()
{
float* p = this.alignedPtr;
Vector128<float> v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
}
[Benchmark]
public void Sse41_V6_Aligned()
{
float* p = this.alignedPtr;
Round8SseVectors(p);
Round8SseVectors(p + 32);
}
private static void Round8SseVectors(float* p0)
{
float* p1 = p0 + 4;
float* p2 = p1 + 4;
float* p3 = p2 + 4;
float* p4 = p3 + 4;
float* p5 = p4 + 4;
float* p6 = p5 + 4;
float* p7 = p6 + 4;
Vector128<float> v0 = Sse.LoadAlignedVector128(p0);
Vector128<float> v1 = Sse.LoadAlignedVector128(p1);
Vector128<float> v2 = Sse.LoadAlignedVector128(p2);
Vector128<float> v3 = Sse.LoadAlignedVector128(p3);
Vector128<float> v4 = Sse.LoadAlignedVector128(p4);
Vector128<float> v5 = Sse.LoadAlignedVector128(p5);
Vector128<float> v6 = Sse.LoadAlignedVector128(p6);
Vector128<float> v7 = Sse.LoadAlignedVector128(p7);
v0 = Sse41.RoundToNearestInteger(v0);
v1 = Sse41.RoundToNearestInteger(v1);
v2 = Sse41.RoundToNearestInteger(v2);
v3 = Sse41.RoundToNearestInteger(v3);
v4 = Sse41.RoundToNearestInteger(v4);
v5 = Sse41.RoundToNearestInteger(v5);
v6 = Sse41.RoundToNearestInteger(v6);
v7 = Sse41.RoundToNearestInteger(v7);
Sse.StoreAligned(p0, v0);
Sse.StoreAligned(p1, v1);
Sse.StoreAligned(p2, v2);
Sse.StoreAligned(p3, v3);
Sse.StoreAligned(p4, v4);
Sse.StoreAligned(p5, v5);
Sse.StoreAligned(p6, v6);
Sse.StoreAligned(p7, v7);
}
#endif
}
}