|
|
@ -7,6 +7,8 @@ using System.Numerics; |
|
|
using System.Runtime.CompilerServices; |
|
|
using System.Runtime.CompilerServices; |
|
|
using System.Runtime.InteropServices; |
|
|
using System.Runtime.InteropServices; |
|
|
|
|
|
|
|
|
|
|
|
using SixLabors.ImageSharp.PixelFormats; |
|
|
|
|
|
|
|
|
namespace SixLabors.ImageSharp |
|
|
namespace SixLabors.ImageSharp |
|
|
{ |
|
|
{ |
|
|
/// <summary>
|
|
|
/// <summary>
|
|
|
@ -103,6 +105,47 @@ namespace SixLabors.ImageSharp |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|
|
|
|
|
{ |
|
|
|
|
|
if (!Vector.IsHardwareAccelerated) |
|
|
|
|
|
{ |
|
|
|
|
|
throw new InvalidOperationException( |
|
|
|
|
|
"Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!"); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DebugGuard.IsTrue((dest.Length % Vector<float>.Count) == 0, nameof(source), "dest.Length should be divisable by Vector<float>.Count!"); |
|
|
|
|
|
|
|
|
|
|
|
var bVec = new Vector<float>(256.0f / 255.0f); |
|
|
|
|
|
var magicFloat = new Vector<float>(32768.0f); |
|
|
|
|
|
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
|
|
|
|
|
|
var mask = new Vector<uint>(255); |
|
|
|
|
|
|
|
|
|
|
|
ref Octet.OfByte sourceBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(source)); |
|
|
|
|
|
ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, Octet.OfUInt32>(ref MemoryMarshal.GetReference(dest)); |
|
|
|
|
|
|
|
|
|
|
|
ref Vector<float> destBaseAsFloat = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet); |
|
|
|
|
|
|
|
|
|
|
|
int n = dest.Length / 8; |
|
|
|
|
|
Octet.OfUInt32 temp = default; |
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < n; i++) |
|
|
|
|
|
{ |
|
|
|
|
|
Octet.OfByte sVal = Unsafe.Add(ref sourceBase, i); |
|
|
|
|
|
|
|
|
|
|
|
// This call is the bottleneck now:
|
|
|
|
|
|
temp.LoadFrom(ref sVal); |
|
|
|
|
|
|
|
|
|
|
|
Vector<uint> vi = Unsafe.As<Octet.OfUInt32, Vector<uint>>(ref temp); |
|
|
|
|
|
vi &= mask; |
|
|
|
|
|
vi |= magicInt; |
|
|
|
|
|
|
|
|
|
|
|
var vf = Vector.AsVectorSingle(vi); |
|
|
|
|
|
vf = (vf - magicFloat) * bVec; |
|
|
|
|
|
|
|
|
|
|
|
Unsafe.Add(ref destBaseAsFloat, i) = vf; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
/// <summary>
|
|
|
/// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
|
|
|
/// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
|
|
|
/// </summary>
|
|
|
/// </summary>
|
|
|
@ -181,6 +224,19 @@ namespace SixLabors.ImageSharp |
|
|
{ |
|
|
{ |
|
|
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; |
|
|
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
|
|
public void LoadFrom(ref OfByte src) |
|
|
|
|
|
{ |
|
|
|
|
|
this.V0 = src.V0; |
|
|
|
|
|
this.V1 = src.V1; |
|
|
|
|
|
this.V2 = src.V2; |
|
|
|
|
|
this.V3 = src.V3; |
|
|
|
|
|
this.V4 = src.V4; |
|
|
|
|
|
this.V5 = src.V5; |
|
|
|
|
|
this.V6 = src.V6; |
|
|
|
|
|
this.V7 = src.V7; |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
[StructLayout(LayoutKind.Explicit, Size = 8)] |
|
|
[StructLayout(LayoutKind.Explicit, Size = 8)] |
|
|
@ -215,16 +271,17 @@ namespace SixLabors.ImageSharp |
|
|
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; |
|
|
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
public void LoadFrom(ref OfUInt32 i) |
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
|
|
public void LoadFrom(ref OfUInt32 src) |
|
|
{ |
|
|
{ |
|
|
this.V0 = (byte)i.V0; |
|
|
this.V0 = (byte)src.V0; |
|
|
this.V1 = (byte)i.V1; |
|
|
this.V1 = (byte)src.V1; |
|
|
this.V2 = (byte)i.V2; |
|
|
this.V2 = (byte)src.V2; |
|
|
this.V3 = (byte)i.V3; |
|
|
this.V3 = (byte)src.V3; |
|
|
this.V4 = (byte)i.V4; |
|
|
this.V4 = (byte)src.V4; |
|
|
this.V5 = (byte)i.V5; |
|
|
this.V5 = (byte)src.V5; |
|
|
this.V6 = (byte)i.V6; |
|
|
this.V6 = (byte)src.V6; |
|
|
this.V7 = (byte)i.V7; |
|
|
this.V7 = (byte)src.V7; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|