Browse Source

BulkConvertByteToNormalizedFloat

af/merge-core
Anton Firszov 8 years ago
parent
commit
189d602fb7
  1. 75
      src/ImageSharp/Common/Extensions/SimdUtils.cs
  2. 12
      src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
  3. 31
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
  4. 1384
      tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs

75
src/ImageSharp/Common/Extensions/SimdUtils.cs

@ -7,6 +7,8 @@ using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp namespace SixLabors.ImageSharp
{ {
/// <summary> /// <summary>
@ -103,6 +105,47 @@ namespace SixLabors.ImageSharp
} }
} }
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
if (!Vector.IsHardwareAccelerated)
{
throw new InvalidOperationException(
"Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!");
}
DebugGuard.IsTrue((dest.Length % Vector<float>.Count) == 0, nameof(source), "dest.Length should be divisable by Vector<float>.Count!");
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
ref Octet.OfByte sourceBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(source));
ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, Octet.OfUInt32>(ref MemoryMarshal.GetReference(dest));
ref Vector<float> destBaseAsFloat = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet);
int n = dest.Length / 8;
Octet.OfUInt32 temp = default;
for (int i = 0; i < n; i++)
{
Octet.OfByte sVal = Unsafe.Add(ref sourceBase, i);
// This call is the bottleneck now:
temp.LoadFrom(ref sVal);
Vector<uint> vi = Unsafe.As<Octet.OfUInt32, Vector<uint>>(ref temp);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
Unsafe.Add(ref destBaseAsFloat, i) = vf;
}
}
/// <summary> /// <summary>
/// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion. /// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
/// </summary> /// </summary>
@ -181,6 +224,19 @@ namespace SixLabors.ImageSharp
{ {
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
} }
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(ref OfByte src)
{
this.V0 = src.V0;
this.V1 = src.V1;
this.V2 = src.V2;
this.V3 = src.V3;
this.V4 = src.V4;
this.V5 = src.V5;
this.V6 = src.V6;
this.V7 = src.V7;
}
} }
[StructLayout(LayoutKind.Explicit, Size = 8)] [StructLayout(LayoutKind.Explicit, Size = 8)]
@ -215,16 +271,17 @@ namespace SixLabors.ImageSharp
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
} }
public void LoadFrom(ref OfUInt32 i) [MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(ref OfUInt32 src)
{ {
this.V0 = (byte)i.V0; this.V0 = (byte)src.V0;
this.V1 = (byte)i.V1; this.V1 = (byte)src.V1;
this.V2 = (byte)i.V2; this.V2 = (byte)src.V2;
this.V3 = (byte)i.V3; this.V3 = (byte)src.V3;
this.V4 = (byte)i.V4; this.V4 = (byte)src.V4;
this.V5 = (byte)i.V5; this.V5 = (byte)src.V5;
this.V6 = (byte)i.V6; this.V6 = (byte)src.V6;
this.V7 = (byte)i.V7; this.V7 = (byte)src.V7;
} }
} }
} }

12
src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs

@ -57,14 +57,14 @@ namespace SixLabors.ImageSharp.PixelFormats
int unpackedRawCount = count * 4; int unpackedRawCount = count * 4;
ref uint sourceBase = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(sourceColors)); ref uint sourceBase = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(sourceColors));
ref UnpackedRGBA destBaseAsUnpacked = ref Unsafe.As<Vector4, UnpackedRGBA>(ref MemoryMarshal.GetReference(destVectors)); ref WideRgba destBaseAsWide = ref Unsafe.As<Vector4, WideRgba>(ref MemoryMarshal.GetReference(destVectors));
ref Vector<uint> destBaseAsUInt = ref Unsafe.As<UnpackedRGBA, Vector<uint>>(ref destBaseAsUnpacked); ref Vector<uint> destBaseAsUInt = ref Unsafe.As<WideRgba, Vector<uint>>(ref destBaseAsWide);
ref Vector<float> destBaseAsFloat = ref Unsafe.As<UnpackedRGBA, Vector<float>>(ref destBaseAsUnpacked); ref Vector<float> destBaseAsFloat = ref Unsafe.As<WideRgba, Vector<float>>(ref destBaseAsWide);
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
uint sVal = Unsafe.Add(ref sourceBase, i); uint sVal = Unsafe.Add(ref sourceBase, i);
ref UnpackedRGBA dst = ref Unsafe.Add(ref destBaseAsUnpacked, i); ref WideRgba dst = ref Unsafe.Add(ref destBaseAsWide, i);
// This call is the bottleneck now: // This call is the bottleneck now:
dst.Load(sVal); dst.Load(sVal);
@ -174,10 +174,10 @@ namespace SixLabors.ImageSharp.PixelFormats
} }
/// <summary> /// <summary>
/// Value type to store <see cref="Rgba32"/>-s unpacked into multiple <see cref="uint"/>-s. /// Value type to store <see cref="Rgba32"/>-s widened into multiple <see cref="uint"/>-s.
/// </summary> /// </summary>
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
private struct UnpackedRGBA private struct WideRgba
{ {
private uint r; private uint r;

31
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

@ -6,8 +6,13 @@
using System.Buffers; using System.Buffers;
using System; using System;
using System.Numerics; using System.Numerics;
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes.Jobs;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
@ -17,11 +22,13 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public abstract class ToVector4<TPixel> public abstract class ToVector4<TPixel>
where TPixel : struct, IPixel<TPixel> where TPixel : struct, IPixel<TPixel>
{ {
private IMemoryOwner<TPixel> source; protected IMemoryOwner<TPixel> source;
private IMemoryOwner<Vector4> destination; protected IMemoryOwner<Vector4> destination;
[Params(64, 300, 1024)] [Params(
//64,
1024)]
public int Count { get; set; } public int Count { get; set; }
[GlobalSetup] [GlobalSetup]
@ -38,7 +45,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
this.destination.Dispose(); this.destination.Dispose();
} }
[Benchmark(Baseline = true)] [Benchmark]
public void PerElement() public void PerElement()
{ {
Span<TPixel> s = this.source.GetSpan(); Span<TPixel> s = this.source.GetSpan();
@ -51,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
} }
} }
[Benchmark] [Benchmark(Baseline = true)]
public void CommonBulk() public void CommonBulk()
{ {
new PixelOperations<TPixel>().ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count); new PixelOperations<TPixel>().ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
@ -64,7 +71,21 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
} }
} }
[CoreJob]
//[ClrJob]
public class ToVector4_Rgba32 : ToVector4<Rgba32> public class ToVector4_Rgba32 : ToVector4<Rgba32>
{ {
class Config : ManualConfig
{
}
[Benchmark]
public void BulkConvertByteToNormalizedFloat()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
}
} }
} }

1384
tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs

File diff suppressed because it is too large
Loading…
Cancel
Save