Browse Source

benchmarks & scalar implementation

js/color-alpha-handling
Anton Firszov 5 years ago
parent
commit
e402700a44
  1. 3
      .gitattributes
  2. 99
      src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
  3. 28
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  4. 10
      src/ImageSharp/ImageSharp.csproj
  5. 252
      tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs

3
.gitattributes

@ -80,8 +80,11 @@
*.pvr binary
*.snk binary
*.tga binary
*.tif binary
*.tiff binary
*.ttc binary
*.ttf binary
*.wbmp binary
*.webp binary
*.woff binary
*.woff2 binary

99
src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs

@ -0,0 +1,99 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
}
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgba32> destination)
{
}
private static void PackFromRgbPlanesScalarBatchedReduce(
ref ReadOnlySpan<byte> redChannel,
ref ReadOnlySpan<byte> greenChannel,
ref ReadOnlySpan<byte> blueChannel,
ref Span<Rgb24> destination)
{
ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel));
ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel));
ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel));
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
int count = destination.Length / 4;
for (int i = 0; i < count; i++)
{
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
}
int finished = count * 4;
redChannel = redChannel.Slice(finished);
greenChannel = greenChannel.Slice(finished);
blueChannel = blueChannel.Slice(finished);
destination = destination.Slice(finished);
}
private static void PackFromRgbPlanesRemainder(
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
ref byte r = ref MemoryMarshal.GetReference(redChannel);
ref byte g = ref MemoryMarshal.GetReference(greenChannel);
ref byte b = ref MemoryMarshal.GetReference(blueChannel);
ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < destination.Length; i++)
{
ref Rgb24 d = ref Unsafe.Add(ref rgb, i);
d.R = Unsafe.Add(ref r, i);
d.G = Unsafe.Add(ref g, i);
d.B = Unsafe.Add(ref b, i);
}
}
}
}

28
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -148,26 +148,6 @@ namespace SixLabors.ImageSharp
}
}
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgb24> destination)
{
}
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromRgbPlanes(
Configuration configuration,
ReadOnlySpan<byte> redChannel,
ReadOnlySpan<byte> greenChannel,
ReadOnlySpan<byte> blueChannel,
Span<Rgba32> destination)
{
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> dest)
{
@ -241,5 +221,13 @@ namespace SixLabors.ImageSharp
nameof(source),
$"length should be divisible by {shouldBeDivisibleBy}!");
}
private struct ByteTuple4
{
public byte V0;
public byte V1;
public byte V2;
public byte V3;
}
}
}

10
src/ImageSharp/ImageSharp.csproj

@ -25,16 +25,16 @@
</ItemGroup>
<ItemGroup Condition=" $(TargetFramework.StartsWith('netstandard')) OR '$(TargetFramework)' == 'net472'">
<PackageReference Include="System.Numerics.Vectors" Version="4.5.0"/>
<PackageReference Include="System.Numerics.Vectors" Version="4.5.0" />
<PackageReference Include="System.Buffers" Version="4.5.1" />
<PackageReference Include="System.Memory" Version="4.5.4"/>
<PackageReference Include="System.Memory" Version="4.5.4" />
</ItemGroup>
<ItemGroup Condition=" '$(TargetFramework)' == 'netstandard1.3'">
<PackageReference Include="System.IO.Compression" Version="4.3.0"/>
<PackageReference Include="System.IO.Compression" Version="4.3.0" />
<PackageReference Include="System.IO.UnmanagedMemoryStream" Version="4.3.0" />
<PackageReference Include="System.Threading.Tasks.Parallel" Version="4.3.0"/>
<PackageReference Include="System.ValueTuple" Version="4.5.0"/>
<PackageReference Include="System.Threading.Tasks.Parallel" Version="4.3.0" />
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
<ItemGroup>

252
tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs

@ -0,0 +1,252 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
{
public unsafe class PixelConversion_PackFromRgbPlanes
{
private byte[] rBuf;
private byte[] gBuf;
private byte[] bBuf;
private Rgb24[] rgbBuf;
private Rgba32[] rgbaBuf;
private float[] rFloat;
private float[] gFloat;
private float[] bFloat;
private float[] rgbaFloat;
[Params(512)]
public int Count { get; set; }
[GlobalSetup]
public void Setup()
{
this.rBuf = new byte[this.Count];
this.gBuf = new byte[this.Count];
this.bBuf = new byte[this.Count];
this.rgbBuf = new Rgb24[this.Count];
this.rgbaBuf = new Rgba32[this.Count];
this.rFloat = new float[this.Count];
this.gFloat = new float[this.Count];
this.bFloat = new float[this.Count];
this.rgbaFloat = new float[this.Count * 4];
}
// [Benchmark(Baseline = true)]
public void Rgb24_Scalar_PerElement_Pinned()
{
fixed (byte* r = &this.rBuf[0])
fixed (byte* g = &this.gBuf[0])
fixed (byte* b = &this.bBuf[0])
fixed (Rgb24* rgb = &this.rgbBuf[0])
{
for (int i = 0; i < this.Count; i++)
{
Rgb24* d = rgb + i;
d->R = r[i];
d->G = g[i];
d->B = b[i];
}
}
}
[Benchmark]
public void Rgb24_Scalar_PerElement_Span()
{
Span<byte> r = this.rBuf;
Span<byte> g = this.rBuf;
Span<byte> b = this.rBuf;
Span<Rgb24> rgb = this.rgbBuf;
for (int i = 0; i < rgb.Length; i++)
{
ref Rgb24 d = ref rgb[i];
d.R = r[i];
d.G = g[i];
d.B = b[i];
}
}
[Benchmark(Baseline = true)]
public void Rgb24_Scalar_PerElement_Unsafe()
{
ref byte r = ref this.rBuf[0];
ref byte g = ref this.rBuf[0];
ref byte b = ref this.rBuf[0];
ref Rgb24 rgb = ref this.rgbBuf[0];
for (int i = 0; i < this.Count; i++)
{
ref Rgb24 d = ref Unsafe.Add(ref rgb, i);
d.R = Unsafe.Add(ref r, i);
d.G = Unsafe.Add(ref g, i);
d.B = Unsafe.Add(ref b, i);
}
}
[Benchmark]
public void Rgb24_Scalar_PerElement_Batched8()
{
ref Byte8 r = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]);
ref Byte8 g = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]);
ref Byte8 b = ref Unsafe.As<byte, Byte8>(ref this.rBuf[0]);
ref Rgb24 rgb = ref this.rgbBuf[0];
int count = this.Count / 8;
for (int i = 0; i < count; i++)
{
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 8);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
ref Rgb24 d4 = ref Unsafe.Add(ref d0, 4);
ref Rgb24 d5 = ref Unsafe.Add(ref d0, 5);
ref Rgb24 d6 = ref Unsafe.Add(ref d0, 6);
ref Rgb24 d7 = ref Unsafe.Add(ref d0, 7);
ref Byte8 rr = ref Unsafe.Add(ref r, i);
ref Byte8 gg = ref Unsafe.Add(ref g, i);
ref Byte8 bb = ref Unsafe.Add(ref b, i);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
d4.R = rr.V4;
d4.G = gg.V4;
d4.B = bb.V4;
d5.R = rr.V5;
d5.G = gg.V5;
d5.B = bb.V5;
d6.R = rr.V6;
d6.G = gg.V6;
d6.B = bb.V6;
d7.R = rr.V7;
d7.G = gg.V7;
d7.B = bb.V7;
}
}
[Benchmark]
public void Rgb24_Scalar_PerElement_Batched4()
{
ref Byte4 r = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]);
ref Byte4 g = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]);
ref Byte4 b = ref Unsafe.As<byte, Byte4>(ref this.rBuf[0]);
ref Rgb24 rgb = ref this.rgbBuf[0];
int count = this.Count / 4;
for (int i = 0; i < count; i++)
{
ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
ref Byte4 rr = ref Unsafe.Add(ref r, i);
ref Byte4 gg = ref Unsafe.Add(ref g, i);
ref Byte4 bb = ref Unsafe.Add(ref b, i);
d0.R = rr.V0;
d0.G = gg.V0;
d0.B = bb.V0;
d1.R = rr.V1;
d1.G = gg.V1;
d1.B = bb.V1;
d2.R = rr.V2;
d2.G = gg.V2;
d2.B = bb.V2;
d3.R = rr.V3;
d3.G = gg.V3;
d3.B = bb.V3;
}
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void Rgba32_Vector_Float()
{
ref Vector256<float> rBase = ref Unsafe.As<float, Vector256<float>>(ref this.rFloat[0]);
ref Vector256<float> gBase = ref Unsafe.As<float, Vector256<float>>(ref this.gFloat[0]);
ref Vector256<float> bBase = ref Unsafe.As<float, Vector256<float>>(ref this.bFloat[0]);
ref Vector256<float> resultBase = ref Unsafe.As<float, Vector256<float>>(ref this.rgbaFloat[0]);
int count = this.Count / Vector256<float>.Count;
ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
var va = Vector256.Create(1F);
for (int i = 0; i < count; i++)
{
Vector256<float> r = Unsafe.Add(ref rBase, i);
Vector256<float> g = Unsafe.Add(ref gBase, i);
Vector256<float> b = Unsafe.Add(ref bBase, i);
r = Avx2.PermuteVar8x32(r, vcontrol);
g = Avx2.PermuteVar8x32(g, vcontrol);
b = Avx2.PermuteVar8x32(b, vcontrol);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
}
#endif
#pragma warning disable SA1132
private struct Byte8
{
public byte V0, V1, V2, V3, V4, V5, V6, V7;
}
private struct Byte4
{
public byte V0, V1, V2, V3;
}
#pragma warning restore
}
}
Loading…
Cancel
Save