mirror of https://github.com/SixLabors/ImageSharp
committed by
GitHub
22 changed files with 748 additions and 869 deletions
@ -0,0 +1,78 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Six Labors Split License.
|
||||
|
|
||||
|
using System.Runtime.CompilerServices; |
||||
|
using System.Runtime.InteropServices; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp; |
||||
|
|
||||
|
internal static partial class SimdUtils |
||||
|
{ |
||||
|
/// <summary>
|
||||
|
/// Converts all input <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1].
|
||||
|
/// <paramref name="source"/> should be the of the same size as <paramref name="destination"/>,
|
||||
|
/// but there are no restrictions on the span's length.
|
||||
|
/// </summary>
|
||||
|
/// <param name="source">The source span of bytes</param>
|
||||
|
/// <param name="destination">The destination span of floats</param>
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> destination) |
||||
|
{ |
||||
|
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!"); |
||||
|
|
||||
|
HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref destination); |
||||
|
|
||||
|
if (source.Length > 0) |
||||
|
{ |
||||
|
ConvertByteToNormalizedFloatRemainder(source, destination); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// <summary>
|
||||
|
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'destination' buffer of <see cref="byte"/>.
|
||||
|
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
|
||||
|
/// <paramref name="source"/> should be the of the same size as <paramref name="destination"/>,
|
||||
|
/// but there are no restrictions on the span's length.
|
||||
|
/// </summary>
|
||||
|
/// <param name="source">The source span of floats</param>
|
||||
|
/// <param name="destination">The destination span of bytes</param>
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> destination) |
||||
|
{ |
||||
|
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!"); |
||||
|
|
||||
|
HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref destination); |
||||
|
|
||||
|
if (source.Length > 0) |
||||
|
{ |
||||
|
ConvertNormalizedFloatToByteRemainder(source, destination); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
[MethodImpl(MethodImplOptions.NoInlining)] |
||||
|
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> destination) |
||||
|
{ |
||||
|
ref byte sBase = ref MemoryMarshal.GetReference(source); |
||||
|
ref float dBase = ref MemoryMarshal.GetReference(destination); |
||||
|
|
||||
|
for (int i = 0; i < source.Length; i++) |
||||
|
{ |
||||
|
Unsafe.Add(ref dBase, (uint)i) = Unsafe.Add(ref sBase, (uint)i) / 255f; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
[MethodImpl(MethodImplOptions.NoInlining)] |
||||
|
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> destination) |
||||
|
{ |
||||
|
ref float sBase = ref MemoryMarshal.GetReference(source); |
||||
|
ref byte dBase = ref MemoryMarshal.GetReference(destination); |
||||
|
|
||||
|
for (int i = 0; i < source.Length; i++) |
||||
|
{ |
||||
|
Unsafe.Add(ref dBase, (uint)i) = ConvertToByte(Unsafe.Add(ref sBase, (uint)i)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
||||
|
private static byte ConvertToByte(float f) => (byte)Numerics.Clamp((f * 255f) + 0.5f, 0, 255f); |
||||
|
} |
||||
@ -1,182 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Numerics; |
|
||||
using System.Runtime.CompilerServices; |
|
||||
using System.Runtime.InteropServices; |
|
||||
|
|
||||
// ReSharper disable MemberHidesStaticFromOuterClass
|
|
||||
namespace SixLabors.ImageSharp; |
|
||||
|
|
||||
internal static partial class SimdUtils |
|
||||
{ |
|
||||
/// <summary>
|
|
||||
/// Implementation methods based on newer <see cref="Vector{T}"/> API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*).
|
|
||||
/// Only accelerated only on RyuJIT having dotnet/coreclr#10662 merged (.NET Core 2.1+ .NET 4.7.2+)
|
|
||||
/// See:
|
|
||||
/// https://github.com/dotnet/coreclr/pull/10662
|
|
||||
/// API Proposal:
|
|
||||
/// https://github.com/dotnet/corefx/issues/15957
|
|
||||
/// </summary>
|
|
||||
public static class ExtendedIntrinsics |
|
||||
{ |
|
||||
public static bool IsAvailable { get; } = Vector.IsHardwareAccelerated; |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// Widen and convert a vector of <see cref="short"/> values into 2 vectors of <see cref="float"/>-s.
|
|
||||
/// </summary>
|
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
||||
internal static void ConvertToSingle( |
|
||||
Vector<short> source, |
|
||||
out Vector<float> dest1, |
|
||||
out Vector<float> dest2) |
|
||||
{ |
|
||||
Vector.Widen(source, out Vector<int> i1, out Vector<int> i2); |
|
||||
dest1 = Vector.ConvertToSingle(i1); |
|
||||
dest2 = Vector.ConvertToSingle(i2); |
|
||||
} |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|
||||
/// </summary>
|
|
||||
[MethodImpl(InliningOptions.ShortMethod)] |
|
||||
internal static void ByteToNormalizedFloatReduce( |
|
||||
ref ReadOnlySpan<byte> source, |
|
||||
ref Span<float> dest) |
|
||||
{ |
|
||||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|
||||
|
|
||||
if (!IsAvailable) |
|
||||
{ |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count); |
|
||||
int adjustedCount = source.Length - remainder; |
|
||||
|
|
||||
if (adjustedCount > 0) |
|
||||
{ |
|
||||
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]); |
|
||||
|
|
||||
source = source[adjustedCount..]; |
|
||||
dest = dest[adjustedCount..]; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
|
|
||||
/// </summary>
|
|
||||
[MethodImpl(InliningOptions.ShortMethod)] |
|
||||
internal static void NormalizedFloatToByteSaturateReduce( |
|
||||
ref ReadOnlySpan<float> source, |
|
||||
ref Span<byte> dest) |
|
||||
{ |
|
||||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|
||||
|
|
||||
if (!IsAvailable) |
|
||||
{ |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count); |
|
||||
int adjustedCount = source.Length - remainder; |
|
||||
|
|
||||
if (adjustedCount > 0) |
|
||||
{ |
|
||||
NormalizedFloatToByteSaturate(source[..adjustedCount], dest[..adjustedCount]); |
|
||||
|
|
||||
source = source[adjustedCount..]; |
|
||||
dest = dest[adjustedCount..]; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
|
|
||||
/// </summary>
|
|
||||
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|
||||
{ |
|
||||
VerifySpanInput(source, dest, Vector<byte>.Count); |
|
||||
|
|
||||
nuint n = dest.VectorCount<byte>(); |
|
||||
|
|
||||
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source)); |
|
||||
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest)); |
|
||||
|
|
||||
for (nuint i = 0; i < n; i++) |
|
||||
{ |
|
||||
Vector<byte> b = Unsafe.Add(ref sourceBase, i); |
|
||||
|
|
||||
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1); |
|
||||
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1); |
|
||||
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3); |
|
||||
|
|
||||
Vector<float> f0 = ConvertToSingle(w0); |
|
||||
Vector<float> f1 = ConvertToSingle(w1); |
|
||||
Vector<float> f2 = ConvertToSingle(w2); |
|
||||
Vector<float> f3 = ConvertToSingle(w3); |
|
||||
|
|
||||
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4); |
|
||||
d = f0; |
|
||||
Unsafe.Add(ref d, 1) = f1; |
|
||||
Unsafe.Add(ref d, 2) = f2; |
|
||||
Unsafe.Add(ref d, 3) = f3; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
|
|
||||
/// </summary>
|
|
||||
internal static void NormalizedFloatToByteSaturate( |
|
||||
ReadOnlySpan<float> source, |
|
||||
Span<byte> dest) |
|
||||
{ |
|
||||
VerifySpanInput(source, dest, Vector<byte>.Count); |
|
||||
|
|
||||
nuint n = dest.VectorCount<byte>(); |
|
||||
|
|
||||
ref Vector<float> sourceBase = |
|
||||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); |
|
||||
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest)); |
|
||||
|
|
||||
for (nuint i = 0; i < n; i++) |
|
||||
{ |
|
||||
ref Vector<float> s = ref Unsafe.Add(ref sourceBase, i * 4); |
|
||||
|
|
||||
Vector<float> f0 = s; |
|
||||
Vector<float> f1 = Unsafe.Add(ref s, 1); |
|
||||
Vector<float> f2 = Unsafe.Add(ref s, 2); |
|
||||
Vector<float> f3 = Unsafe.Add(ref s, 3); |
|
||||
|
|
||||
Vector<uint> w0 = ConvertToUInt32(f0); |
|
||||
Vector<uint> w1 = ConvertToUInt32(f1); |
|
||||
Vector<uint> w2 = ConvertToUInt32(f2); |
|
||||
Vector<uint> w3 = ConvertToUInt32(f3); |
|
||||
|
|
||||
var u0 = Vector.Narrow(w0, w1); |
|
||||
var u1 = Vector.Narrow(w2, w3); |
|
||||
|
|
||||
Unsafe.Add(ref destBase, i) = Vector.Narrow(u0, u1); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
||||
private static Vector<uint> ConvertToUInt32(Vector<float> vf) |
|
||||
{ |
|
||||
var maxBytes = new Vector<float>(255f); |
|
||||
vf *= maxBytes; |
|
||||
vf += new Vector<float>(0.5f); |
|
||||
vf = Vector.Min(Vector.Max(vf, Vector<float>.Zero), maxBytes); |
|
||||
var vi = Vector.ConvertToInt32(vf); |
|
||||
return Vector.AsVectorUInt32(vi); |
|
||||
} |
|
||||
|
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
||||
private static Vector<float> ConvertToSingle(Vector<uint> u) |
|
||||
{ |
|
||||
var vi = Vector.AsVectorInt32(u); |
|
||||
var v = Vector.ConvertToSingle(vi); |
|
||||
v *= new Vector<float>(1f / 255f); |
|
||||
return v; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,144 +0,0 @@ |
|||||
// Copyright (c) Six Labors.
|
|
||||
// Licensed under the Six Labors Split License.
|
|
||||
|
|
||||
using System.Numerics; |
|
||||
using System.Runtime.CompilerServices; |
|
||||
using System.Runtime.InteropServices; |
|
||||
|
|
||||
// ReSharper disable MemberHidesStaticFromOuterClass
|
|
||||
namespace SixLabors.ImageSharp; |
|
||||
|
|
||||
internal static partial class SimdUtils |
|
||||
{ |
|
||||
/// <summary>
|
|
||||
/// Fallback implementation based on <see cref="Vector4"/> (128bit).
|
|
||||
/// For <see cref="Vector4"/>, efficient software fallback implementations are present,
|
|
||||
/// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P
|
|
||||
/// </summary>
|
|
||||
public static class FallbackIntrinsics128 |
|
||||
{ |
|
||||
/// <summary>
|
|
||||
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|
||||
/// </summary>
|
|
||||
[MethodImpl(InliningOptions.ShortMethod)] |
|
||||
internal static void ByteToNormalizedFloatReduce( |
|
||||
ref ReadOnlySpan<byte> source, |
|
||||
ref Span<float> dest) |
|
||||
{ |
|
||||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|
||||
|
|
||||
int remainder = Numerics.Modulo4(source.Length); |
|
||||
int adjustedCount = source.Length - remainder; |
|
||||
|
|
||||
if (adjustedCount > 0) |
|
||||
{ |
|
||||
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]); |
|
||||
|
|
||||
source = source[adjustedCount..]; |
|
||||
dest = dest[adjustedCount..]; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
|
|
||||
/// </summary>
|
|
||||
[MethodImpl(InliningOptions.ShortMethod)] |
|
||||
internal static void NormalizedFloatToByteSaturateReduce( |
|
||||
ref ReadOnlySpan<float> source, |
|
||||
ref Span<byte> dest) |
|
||||
{ |
|
||||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|
||||
|
|
||||
int remainder = Numerics.Modulo4(source.Length); |
|
||||
int adjustedCount = source.Length - remainder; |
|
||||
|
|
||||
if (adjustedCount > 0) |
|
||||
{ |
|
||||
NormalizedFloatToByteSaturate( |
|
||||
source[..adjustedCount], |
|
||||
dest[..adjustedCount]); |
|
||||
|
|
||||
source = source[adjustedCount..]; |
|
||||
dest = dest[adjustedCount..]; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// Implementation of <see cref="SimdUtils.ByteToNormalizedFloat"/> using <see cref="Vector4"/>.
|
|
||||
/// </summary>
|
|
||||
[MethodImpl(InliningOptions.ColdPath)] |
|
||||
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|
||||
{ |
|
||||
VerifySpanInput(source, dest, 4); |
|
||||
|
|
||||
uint count = (uint)dest.Length / 4; |
|
||||
if (count == 0) |
|
||||
{ |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
ref ByteVector4 sBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(source)); |
|
||||
ref Vector4 dBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(dest)); |
|
||||
|
|
||||
const float scale = 1f / 255f; |
|
||||
Vector4 d = default; |
|
||||
|
|
||||
for (nuint i = 0; i < count; i++) |
|
||||
{ |
|
||||
ref ByteVector4 s = ref Unsafe.Add(ref sBase, i); |
|
||||
d.X = s.X; |
|
||||
d.Y = s.Y; |
|
||||
d.Z = s.Z; |
|
||||
d.W = s.W; |
|
||||
d *= scale; |
|
||||
Unsafe.Add(ref dBase, i) = d; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/// <summary>
|
|
||||
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/> using <see cref="Vector4"/>.
|
|
||||
/// </summary>
|
|
||||
[MethodImpl(InliningOptions.ColdPath)] |
|
||||
internal static void NormalizedFloatToByteSaturate( |
|
||||
ReadOnlySpan<float> source, |
|
||||
Span<byte> dest) |
|
||||
{ |
|
||||
VerifySpanInput(source, dest, 4); |
|
||||
|
|
||||
uint count = (uint)source.Length / 4; |
|
||||
if (count == 0) |
|
||||
{ |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
ref Vector4 sBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(source)); |
|
||||
ref ByteVector4 dBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(dest)); |
|
||||
|
|
||||
var half = new Vector4(0.5f); |
|
||||
var maxBytes = new Vector4(255f); |
|
||||
|
|
||||
for (nuint i = 0; i < count; i++) |
|
||||
{ |
|
||||
Vector4 s = Unsafe.Add(ref sBase, i); |
|
||||
s *= maxBytes; |
|
||||
s += half; |
|
||||
s = Numerics.Clamp(s, Vector4.Zero, maxBytes); |
|
||||
|
|
||||
ref ByteVector4 d = ref Unsafe.Add(ref dBase, i); |
|
||||
d.X = (byte)s.X; |
|
||||
d.Y = (byte)s.Y; |
|
||||
d.Z = (byte)s.Z; |
|
||||
d.W = (byte)s.W; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
[StructLayout(LayoutKind.Sequential)] |
|
||||
private struct ByteVector4 |
|
||||
{ |
|
||||
public byte X; |
|
||||
public byte Y; |
|
||||
public byte Z; |
|
||||
public byte W; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
Loading…
Reference in new issue