mirror of https://github.com/SixLabors/ImageSharp
561 changed files with 40098 additions and 17247 deletions
@ -0,0 +1,103 @@ |
|||
// Copyright (c) Six Labors and contributors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
using SixLabors.ImageSharp.Processing.Processors.Dithering; |
|||
using SixLabors.ImageSharp.Processing.Processors.Quantization; |
|||
|
|||
namespace SixLabors.ImageSharp.Advanced |
|||
{ |
|||
/// <summary>
|
|||
/// Unlike traditional Mono/.NET, code on the iPhone is statically compiled ahead of time instead of being
|
|||
/// compiled on demand by a JIT compiler. This means there are a few limitations with respect to generics,
|
|||
/// these are caused because not every possible generic instantiation can be determined up front at compile time.
|
|||
/// The Aot Compiler is designed to overcome the limitations of this compiler.
|
|||
/// </summary>
|
|||
public static class AotCompilerTools |
|||
{ |
|||
/// <summary>
|
|||
/// Seeds the compiler using the given pixel format.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
public static void Seed<TPixel>() |
|||
where TPixel : struct, IPixel<TPixel> |
|||
{ |
|||
// This is we actually call all the individual methods you need to seed.
|
|||
AotCompileOctreeQuantizer<TPixel>(); |
|||
AotCompileWuQuantizer<TPixel>(); |
|||
AotCompileDithering<TPixel>(); |
|||
|
|||
// TODO: Do the discovery work to figure out what works and what doesn't.
|
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Seeds the compiler using the given pixel formats.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The first pixel format.</typeparam>
|
|||
/// <typeparam name="TPixel2">The second pixel format.</typeparam>
|
|||
public static void Seed<TPixel, TPixel2>() |
|||
where TPixel : struct, IPixel<TPixel> |
|||
where TPixel2 : struct, IPixel<TPixel2> |
|||
{ |
|||
Seed<TPixel>(); |
|||
Seed<TPixel2>(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Seeds the compiler using the given pixel formats.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The first pixel format.</typeparam>
|
|||
/// <typeparam name="TPixel2">The second pixel format.</typeparam>
|
|||
/// <typeparam name="TPixel3">The third pixel format.</typeparam>
|
|||
public static void Seed<TPixel, TPixel2, TPixel3>() |
|||
where TPixel : struct, IPixel<TPixel> |
|||
where TPixel2 : struct, IPixel<TPixel2> |
|||
where TPixel3 : struct, IPixel<TPixel3> |
|||
{ |
|||
Seed<TPixel, TPixel2>(); |
|||
Seed<TPixel3>(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// This method doesn't actually do anything but serves an important purpose...
|
|||
/// If you are running ImageSharp on iOS and try to call SaveAsGif, it will throw an excepion:
|
|||
/// "Attempting to JIT compile method... OctreeFrameQuantizer.ConstructPalette... while running in aot-only mode."
|
|||
/// The reason this happens is the SaveAsGif method makes haevy use of generics, which are too confusing for the AoT
|
|||
/// compiler used on Xamarin.iOS. It spins up the JIT compiler to try and figure it out, but that is an illegal op on
|
|||
/// iOS so it bombs out.
|
|||
/// If you are getting the above error, you need to call this method, which will pre-seed the AoT compiler with the
|
|||
/// necessary methods to complete the SaveAsGif call. That's it, otherwise you should NEVER need this method!!!
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
private static void AotCompileOctreeQuantizer<TPixel>() |
|||
where TPixel : struct, IPixel<TPixel> |
|||
{ |
|||
var test = new OctreeFrameQuantizer<TPixel>(new OctreeQuantizer(false)); |
|||
test.AotGetPalette(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// This method pre-seeds the WuQuantizer in the AoT compiler for iOS.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
private static void AotCompileWuQuantizer<TPixel>() |
|||
where TPixel : struct, IPixel<TPixel> |
|||
{ |
|||
var test = new WuFrameQuantizer<TPixel>(new WuQuantizer(false)); |
|||
test.QuantizeFrame(new ImageFrame<TPixel>(Configuration.Default, 1, 1)); |
|||
test.AotGetPalette(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// This method pre-seeds the default dithering engine (FloydSteinbergDiffuser) in the AoT compiler for iOS.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel format.</typeparam>
|
|||
private static void AotCompileDithering<TPixel>() |
|||
where TPixel : struct, IPixel<TPixel> |
|||
{ |
|||
var test = new FloydSteinbergDiffuser(); |
|||
TPixel pixel = default; |
|||
test.Dither<TPixel>(new ImageFrame<TPixel>(Configuration.Default, 1, 1), pixel, pixel, 0, 0, 0, 0, 0, 0); |
|||
} |
|||
} |
|||
} |
|||
@ -1,232 +0,0 @@ |
|||
// Copyright (c) Six Labors and contributors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
/// <summary>
|
|||
/// Various extension and utility methods for <see cref="Vector4"/> and <see cref="Vector{T}"/> utilizing SIMD capabilities
|
|||
/// </summary>
|
|||
internal static class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte.
|
|||
/// </summary>
|
|||
public static bool IsAvx2CompatibleArchitecture => Vector<float>.Count == 8 && Vector<int>.Count == 8; |
|||
|
|||
internal static void GuardAvx2(string operation) |
|||
{ |
|||
if (!IsAvx2CompatibleArchitecture) |
|||
{ |
|||
throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!"); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
|
|||
/// </summary>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
internal static Vector4 PseudoRound(this Vector4 v) |
|||
{ |
|||
var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1)); |
|||
|
|||
return v + (sign * 0.5f); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Rounds all values in 'v' to the nearest integer following <see cref="MidpointRounding.ToEven"/> semantics.
|
|||
/// Source:
|
|||
/// <see>
|
|||
/// <cref>https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110</cref>
|
|||
/// </see>
|
|||
/// </summary>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
internal static Vector<float> FastRound(this Vector<float> x) |
|||
{ |
|||
Vector<int> magic0 = new Vector<int>(int.MinValue); // 0x80000000
|
|||
Vector<float> sgn0 = Vector.AsVectorSingle(magic0); |
|||
Vector<float> and0 = Vector.BitwiseAnd(sgn0, x); |
|||
Vector<float> or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f)); |
|||
Vector<float> add0 = Vector.Add(x, or0); |
|||
Vector<float> sub0 = Vector.Subtract(add0, or0); |
|||
return sub0; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/> values.
|
|||
/// The values gonna be scaled up into [0-255] and rounded.
|
|||
/// Based on:
|
|||
/// <see>
|
|||
/// <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
|
|||
/// </see>
|
|||
/// </summary>
|
|||
internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest) |
|||
{ |
|||
GuardAvx2(nameof(BulkConvertNormalizedFloatToByte)); |
|||
|
|||
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!"); |
|||
|
|||
if (source.Length == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest)); |
|||
int n = source.Length / 8; |
|||
|
|||
Vector<float> magick = new Vector<float>(32768.0f); |
|||
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f); |
|||
|
|||
// need to copy to a temporary struct, because
|
|||
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
|
|||
// does not work. TODO: This might be a CoreClr bug, need to ask/report
|
|||
var temp = default(Octet.OfUInt32); |
|||
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// union { float f; uint32_t i; } u;
|
|||
// u.f = 32768.0f + x * (255.0f / 256.0f);
|
|||
// return (uint8_t)u.i;
|
|||
Vector<float> x = Unsafe.Add(ref srcBase, i); |
|||
x = (x * scale) + magick; |
|||
tempRef = x; |
|||
|
|||
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); |
|||
d.LoadFrom(ref temp); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
|
|||
/// </summary>
|
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) |
|||
{ |
|||
GuardAvx2(nameof(BulkConvertNormalizedFloatToByte)); |
|||
|
|||
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!"); |
|||
|
|||
if (source.Length == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest)); |
|||
int n = source.Length / 8; |
|||
|
|||
Vector<float> magick = new Vector<float>(32768.0f); |
|||
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f); |
|||
|
|||
// need to copy to a temporary struct, because
|
|||
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
|
|||
// does not work. TODO: This might be a CoreClr bug, need to ask/report
|
|||
var temp = default(Octet.OfUInt32); |
|||
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// union { float f; uint32_t i; } u;
|
|||
// u.f = 32768.0f + x * (255.0f / 256.0f);
|
|||
// return (uint8_t)u.i;
|
|||
Vector<float> x = Unsafe.Add(ref srcBase, i); |
|||
x = Vector.Max(x, Vector<float>.Zero); |
|||
x = Vector.Min(x, Vector<float>.One); |
|||
|
|||
x = (x * scale) + magick; |
|||
tempRef = x; |
|||
|
|||
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); |
|||
d.LoadFrom(ref temp); |
|||
} |
|||
} |
|||
|
|||
// TODO: Replace these with T4-d library level tuples!
|
|||
internal static class Octet |
|||
{ |
|||
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))] |
|||
public struct OfUInt32 |
|||
{ |
|||
[FieldOffset(0 * sizeof(uint))] |
|||
public uint V0; |
|||
|
|||
[FieldOffset(1 * sizeof(uint))] |
|||
public uint V1; |
|||
|
|||
[FieldOffset(2 * sizeof(uint))] |
|||
public uint V2; |
|||
|
|||
[FieldOffset(3 * sizeof(uint))] |
|||
public uint V3; |
|||
|
|||
[FieldOffset(4 * sizeof(uint))] |
|||
public uint V4; |
|||
|
|||
[FieldOffset(5 * sizeof(uint))] |
|||
public uint V5; |
|||
|
|||
[FieldOffset(6 * sizeof(uint))] |
|||
public uint V6; |
|||
|
|||
[FieldOffset(7 * sizeof(uint))] |
|||
public uint V7; |
|||
|
|||
public override string ToString() |
|||
{ |
|||
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; |
|||
} |
|||
} |
|||
|
|||
[StructLayout(LayoutKind.Explicit, Size = 8)] |
|||
public struct OfByte |
|||
{ |
|||
[FieldOffset(0)] |
|||
public byte V0; |
|||
|
|||
[FieldOffset(1)] |
|||
public byte V1; |
|||
|
|||
[FieldOffset(2)] |
|||
public byte V2; |
|||
|
|||
[FieldOffset(3)] |
|||
public byte V3; |
|||
|
|||
[FieldOffset(4)] |
|||
public byte V4; |
|||
|
|||
[FieldOffset(5)] |
|||
public byte V5; |
|||
|
|||
[FieldOffset(6)] |
|||
public byte V6; |
|||
|
|||
[FieldOffset(7)] |
|||
public byte V7; |
|||
|
|||
public override string ToString() |
|||
{ |
|||
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; |
|||
} |
|||
|
|||
public void LoadFrom(ref OfUInt32 i) |
|||
{ |
|||
this.V0 = (byte)i.V0; |
|||
this.V1 = (byte)i.V1; |
|||
this.V2 = (byte)i.V2; |
|||
this.V3 = (byte)i.V3; |
|||
this.V4 = (byte)i.V4; |
|||
this.V5 = (byte)i.V5; |
|||
this.V6 = (byte)i.V6; |
|||
this.V7 = (byte)i.V7; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,215 @@ |
|||
// Copyright (c) Six Labors and contributors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Tuples; |
|||
|
|||
// ReSharper disable MemberHidesStaticFromOuterClass
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Implementation with 256bit / AVX2 intrinsics NOT depending on newer API-s (Vector.Widen etc.)
|
|||
/// </summary>
|
|||
public static class BasicIntrinsics256 |
|||
{ |
|||
public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture; |
|||
|
|||
/// <summary>
|
|||
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertByteToNormalizedFloatReduce( |
|||
ref ReadOnlySpan<byte> source, |
|||
ref Span<float> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (!IsAvailable) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
int remainder = ImageMaths.Modulo8(source.Length); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
BulkConvertByteToNormalizedFloat( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (!IsAvailable) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
int remainder = ImageMaths.Modulo8(source.Length); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// SIMD optimized implementation for <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>.
|
|||
/// Works only with span Length divisible by 8.
|
|||
/// Implementation adapted from:
|
|||
/// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
|
|||
/// http://stackoverflow.com/a/536278
|
|||
/// </summary>
|
|||
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|||
{ |
|||
VerifyIsAvx2Compatible(nameof(BulkConvertByteToNormalizedFloat)); |
|||
VerifySpanInput(source, dest, 8); |
|||
|
|||
var bVec = new Vector<float>(256.0f / 255.0f); |
|||
var magicFloat = new Vector<float>(32768.0f); |
|||
var magicInt = new Vector<uint>(1191182336); // reinterpreted value of 32768.0f
|
|||
var mask = new Vector<uint>(255); |
|||
|
|||
ref Octet.OfByte sourceBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(source)); |
|||
ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, Octet.OfUInt32>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
ref Vector<float> destBaseAsFloat = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet); |
|||
|
|||
int n = dest.Length / 8; |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
ref Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i); |
|||
ref Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i); |
|||
d.LoadFrom(ref s); |
|||
} |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
ref Vector<float> df = ref Unsafe.Add(ref destBaseAsFloat, i); |
|||
|
|||
var vi = Vector.AsVectorUInt32(df); |
|||
vi &= mask; |
|||
vi |= magicInt; |
|||
|
|||
var vf = Vector.AsVectorSingle(vi); |
|||
vf = (vf - magicFloat) * bVec; |
|||
|
|||
df = vf; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> which is faster on older runtimes.
|
|||
/// </summary>
|
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) |
|||
{ |
|||
VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByteClampOverflows)); |
|||
VerifySpanInput(source, dest, 8); |
|||
|
|||
if (source.Length == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest)); |
|||
int n = source.Length / 8; |
|||
|
|||
Vector<float> magick = new Vector<float>(32768.0f); |
|||
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f); |
|||
|
|||
// need to copy to a temporary struct, because
|
|||
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
|
|||
// does not work. TODO: This might be a CoreClr bug, need to ask/report
|
|||
var temp = default(Octet.OfUInt32); |
|||
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// union { float f; uint32_t i; } u;
|
|||
// u.f = 32768.0f + x * (255.0f / 256.0f);
|
|||
// return (uint8_t)u.i;
|
|||
Vector<float> x = Unsafe.Add(ref srcBase, i); |
|||
x = Vector.Max(x, Vector<float>.Zero); |
|||
x = Vector.Min(x, Vector<float>.One); |
|||
|
|||
x = (x * scale) + magick; |
|||
tempRef = x; |
|||
|
|||
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); |
|||
d.LoadFrom(ref temp); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source'
|
|||
/// into 'dest' buffer of <see cref="byte"/>. The values are scaled up into [0-255] and rounded.
|
|||
/// This implementation is SIMD optimized and works only when span Length is divisible by 8.
|
|||
/// Based on:
|
|||
/// <see>
|
|||
/// <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
|
|||
/// </see>
|
|||
/// </summary>
|
|||
internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest) |
|||
{ |
|||
VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByte)); |
|||
VerifySpanInput(source, dest, 8); |
|||
|
|||
if (source.Length == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest)); |
|||
int n = source.Length / 8; |
|||
|
|||
Vector<float> magick = new Vector<float>(32768.0f); |
|||
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f); |
|||
|
|||
// need to copy to a temporary struct, because
|
|||
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
|
|||
// does not work. TODO: This might be a CoreClr bug, need to ask/report
|
|||
var temp = default(Octet.OfUInt32); |
|||
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// union { float f; uint32_t i; } u;
|
|||
// u.f = 32768.0f + x * (255.0f / 256.0f);
|
|||
// return (uint8_t)u.i;
|
|||
Vector<float> x = Unsafe.Add(ref srcBase, i); |
|||
x = (x * scale) + magick; |
|||
tempRef = x; |
|||
|
|||
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); |
|||
d.LoadFrom(ref temp); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,190 @@ |
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
// ReSharper disable MemberHidesStaticFromOuterClass
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Implementation methods based on newer <see cref="Vector{T}"/> API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*).
|
|||
/// Only accelerated only on RyuJIT having dotnet/coreclr#10662 merged (.NET Core 2.1+ .NET 4.7.2+)
|
|||
/// See:
|
|||
/// https://github.com/dotnet/coreclr/pull/10662
|
|||
/// API Proposal:
|
|||
/// https://github.com/dotnet/corefx/issues/15957
|
|||
/// </summary>
|
|||
public static class ExtendedIntrinsics |
|||
{ |
|||
public static bool IsAvailable { get; } = |
|||
#if SUPPORTS_EXTENDED_INTRINSICS
|
|||
Vector.IsHardwareAccelerated; |
|||
#else
|
|||
false; |
|||
#endif
|
|||
|
|||
/// <summary>
|
|||
/// Widen and convert a vector of <see cref="short"/> values into 2 vectors of <see cref="float"/>-s.
|
|||
/// </summary>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
internal static void ConvertToSingle( |
|||
Vector<short> source, |
|||
out Vector<float> dest1, |
|||
out Vector<float> dest2) |
|||
{ |
|||
Vector.Widen(source, out Vector<int> i1, out Vector<int> i2); |
|||
dest1 = Vector.ConvertToSingle(i1); |
|||
dest2 = Vector.ConvertToSingle(i2); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertByteToNormalizedFloatReduce( |
|||
ref ReadOnlySpan<byte> source, |
|||
ref Span<float> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (!IsAvailable) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
BulkConvertByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (!IsAvailable) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
BulkConvertNormalizedFloatToByteClampOverflows( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
|
|||
/// </summary>
|
|||
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, Vector<byte>.Count); |
|||
|
|||
int n = dest.Length / Vector<byte>.Count; |
|||
|
|||
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector<byte> b = Unsafe.Add(ref sourceBase, i); |
|||
|
|||
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1); |
|||
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1); |
|||
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3); |
|||
|
|||
Vector<float> f0 = ConvertToSingle(w0); |
|||
Vector<float> f1 = ConvertToSingle(w1); |
|||
Vector<float> f2 = ConvertToSingle(w2); |
|||
Vector<float> f3 = ConvertToSingle(w3); |
|||
|
|||
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4); |
|||
d = f0; |
|||
Unsafe.Add(ref d, 1) = f1; |
|||
Unsafe.Add(ref d, 2) = f2; |
|||
Unsafe.Add(ref d, 3) = f3; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
|
|||
/// </summary>
|
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflows( |
|||
ReadOnlySpan<float> source, |
|||
Span<byte> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, Vector<byte>.Count); |
|||
|
|||
int n = dest.Length / Vector<byte>.Count; |
|||
|
|||
ref Vector<float> sourceBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
ref Vector<float> s = ref Unsafe.Add(ref sourceBase, i * 4); |
|||
|
|||
Vector<float> f0 = s; |
|||
Vector<float> f1 = Unsafe.Add(ref s, 1); |
|||
Vector<float> f2 = Unsafe.Add(ref s, 2); |
|||
Vector<float> f3 = Unsafe.Add(ref s, 3); |
|||
|
|||
Vector<uint> w0 = ConvertToUInt32(f0); |
|||
Vector<uint> w1 = ConvertToUInt32(f1); |
|||
Vector<uint> w2 = ConvertToUInt32(f2); |
|||
Vector<uint> w3 = ConvertToUInt32(f3); |
|||
|
|||
Vector<ushort> u0 = Vector.Narrow(w0, w1); |
|||
Vector<ushort> u1 = Vector.Narrow(w2, w3); |
|||
|
|||
Vector<byte> b = Vector.Narrow(u0, u1); |
|||
|
|||
Unsafe.Add(ref destBase, i) = b; |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private static Vector<uint> ConvertToUInt32(Vector<float> vf) |
|||
{ |
|||
Vector<float> maxBytes = new Vector<float>(255f); |
|||
vf *= maxBytes; |
|||
vf += new Vector<float>(0.5f); |
|||
vf = Vector.Min(Vector.Max(vf, Vector<float>.Zero), maxBytes); |
|||
Vector<int> vi = Vector.ConvertToInt32(vf); |
|||
return Vector.AsVectorUInt32(vi); |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private static Vector<float> ConvertToSingle(Vector<uint> u) |
|||
{ |
|||
Vector<int> vi = Vector.AsVectorInt32(u); |
|||
Vector<float> v = Vector.ConvertToSingle(vi); |
|||
v *= new Vector<float>(1f / 255f); |
|||
return v; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,151 @@ |
|||
// Copyright (c) Six Labors and contributors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
// ReSharper disable MemberHidesStaticFromOuterClass
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Fallback implementation based on <see cref="Vector4"/> (128bit).
|
|||
/// For <see cref="Vector4"/>, efficient software fallback implementations are present,
|
|||
/// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P
|
|||
/// </summary>
|
|||
public static class FallbackIntrinsics128 |
|||
{ |
|||
/// <summary>
|
|||
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertByteToNormalizedFloatReduce( |
|||
ref ReadOnlySpan<byte> source, |
|||
ref Span<float> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
int remainder = ImageMaths.Modulo4(source.Length); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
BulkConvertByteToNormalizedFloat( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
int remainder = ImageMaths.Modulo4(source.Length); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
BulkConvertNormalizedFloatToByteClampOverflows( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/> using <see cref="Vector4"/>.
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ColdPath)] |
|||
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, 4); |
|||
|
|||
int count = dest.Length / 4; |
|||
if (count == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref ByteVector4 sBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(source)); |
|||
ref Vector4 dBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
const float Scale = 1f / 255f; |
|||
Vector4 d = default; |
|||
|
|||
for (int i = 0; i < count; i++) |
|||
{ |
|||
ref ByteVector4 s = ref Unsafe.Add(ref sBase, i); |
|||
d.X = s.X; |
|||
d.Y = s.Y; |
|||
d.Z = s.Z; |
|||
d.W = s.W; |
|||
d *= Scale; |
|||
Unsafe.Add(ref dBase, i) = d; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> using <see cref="Vector4"/>.
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ColdPath)] |
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflows( |
|||
ReadOnlySpan<float> source, |
|||
Span<byte> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, 4); |
|||
|
|||
int count = source.Length / 4; |
|||
if (count == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref Vector4 sBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(source)); |
|||
ref ByteVector4 dBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
var half = new Vector4(0.5f); |
|||
var maxBytes = new Vector4(255f); |
|||
|
|||
for (int i = 0; i < count; i++) |
|||
{ |
|||
Vector4 s = Unsafe.Add(ref sBase, i); |
|||
s *= maxBytes; |
|||
s += half; |
|||
|
|||
// I'm not sure if Vector4.Clamp() is properly implemented with intrinsics.
|
|||
s = Vector4.Max(Vector4.Zero, s); |
|||
s = Vector4.Min(maxBytes, s); |
|||
|
|||
ref ByteVector4 d = ref Unsafe.Add(ref dBase, i); |
|||
d.X = (byte)s.X; |
|||
d.Y = (byte)s.Y; |
|||
d.Z = (byte)s.Z; |
|||
d.W = (byte)s.W; |
|||
} |
|||
} |
|||
|
|||
[StructLayout(LayoutKind.Sequential)] |
|||
private struct ByteVector4 |
|||
{ |
|||
public byte X; |
|||
public byte Y; |
|||
public byte Z; |
|||
public byte W; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,185 @@ |
|||
// Copyright (c) Six Labors and contributors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
using SixLabors.ImageSharp.Tuples; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
/// <summary>
|
|||
/// Various extension and utility methods for <see cref="Vector4"/> and <see cref="Vector{T}"/> utilizing SIMD capabilities
|
|||
/// </summary>
|
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte.
|
|||
/// </summary>
|
|||
public static bool IsAvx2CompatibleArchitecture { get; } = |
|||
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8; |
|||
|
|||
/// <summary>
|
|||
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
|
|||
/// </summary>
|
|||
/// <param name="v">The vector</param>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
internal static Vector4 PseudoRound(this Vector4 v) |
|||
{ |
|||
var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1)); |
|||
|
|||
return v + (sign * 0.5f); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Rounds all values in 'v' to the nearest integer following <see cref="MidpointRounding.ToEven"/> semantics.
|
|||
/// Source:
|
|||
/// <see>
|
|||
/// <cref>https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110</cref>
|
|||
/// </see>
|
|||
/// </summary>
|
|||
/// <param name="v">The vector</param>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
internal static Vector<float> FastRound(this Vector<float> v) |
|||
{ |
|||
Vector<int> magic0 = new Vector<int>(int.MinValue); // 0x80000000
|
|||
Vector<float> sgn0 = Vector.AsVectorSingle(magic0); |
|||
Vector<float> and0 = Vector.BitwiseAnd(sgn0, v); |
|||
Vector<float> or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f)); |
|||
Vector<float> add0 = Vector.Add(v, or0); |
|||
Vector<float> sub0 = Vector.Subtract(add0, or0); |
|||
return sub0; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts all input <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1].
|
|||
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
|
|||
/// but there are no restrictions on the span's length.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of bytes</param>
|
|||
/// <param name="dest">The destination span of floats</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
#if SUPPORTS_EXTENDED_INTRINSICS
|
|||
ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); |
|||
#else
|
|||
BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); |
|||
#endif
|
|||
FallbackIntrinsics128.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); |
|||
|
|||
// Deal with the remainder:
|
|||
if (source.Length > 0) |
|||
{ |
|||
ConvertByteToNormalizedFloatRemainder(source, dest); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/>.
|
|||
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
|
|||
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
|
|||
/// but there are no restrictions on the span's length.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of floats</param>
|
|||
/// <param name="dest">The destination span of bytes</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
#if SUPPORTS_EXTENDED_INTRINSICS
|
|||
ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); |
|||
#else
|
|||
BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); |
|||
#endif
|
|||
FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); |
|||
|
|||
// Deal with the remainder:
|
|||
if (source.Length > 0) |
|||
{ |
|||
ConvertNormalizedFloatToByteRemainder(source, dest); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ColdPath)] |
|||
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> dest) |
|||
{ |
|||
ref byte sBase = ref MemoryMarshal.GetReference(source); |
|||
ref float dBase = ref MemoryMarshal.GetReference(dest); |
|||
|
|||
// There are at most 3 elements at this point, having a for loop is overkill.
|
|||
// Let's minimize the no. of instructions!
|
|||
switch (source.Length) |
|||
{ |
|||
case 3: |
|||
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2) / 255f; |
|||
goto case 2; |
|||
case 2: |
|||
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1) / 255f; |
|||
goto case 1; |
|||
case 1: |
|||
dBase = sBase / 255f; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ColdPath)] |
|||
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> dest) |
|||
{ |
|||
ref float sBase = ref MemoryMarshal.GetReference(source); |
|||
ref byte dBase = ref MemoryMarshal.GetReference(dest); |
|||
|
|||
switch (source.Length) |
|||
{ |
|||
case 3: |
|||
Unsafe.Add(ref dBase, 2) = ConvertToByte(Unsafe.Add(ref sBase, 2)); |
|||
goto case 2; |
|||
case 2: |
|||
Unsafe.Add(ref dBase, 1) = ConvertToByte(Unsafe.Add(ref sBase, 1)); |
|||
goto case 1; |
|||
case 1: |
|||
dBase = ConvertToByte(sBase); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static byte ConvertToByte(float f) => (byte)ComparableExtensions.Clamp((f * 255f) + 0.5f, 0, 255f); |
|||
|
|||
[Conditional("DEBUG")] |
|||
private static void VerifyIsAvx2Compatible(string operation) |
|||
{ |
|||
if (!IsAvx2CompatibleArchitecture) |
|||
{ |
|||
throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!"); |
|||
} |
|||
} |
|||
|
|||
[Conditional("DEBUG")] |
|||
private static void VerifySpanInput(ReadOnlySpan<byte> source, Span<float> dest, int shouldBeDivisibleBy) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
DebugGuard.IsTrue( |
|||
ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0, |
|||
nameof(source), |
|||
$"length should be divisible by {shouldBeDivisibleBy}!"); |
|||
} |
|||
|
|||
[Conditional("DEBUG")] |
|||
private static void VerifySpanInput(ReadOnlySpan<float> source, Span<byte> dest, int shouldBeDivisibleBy) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
DebugGuard.IsTrue( |
|||
ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0, |
|||
nameof(source), |
|||
$"length should be divisible by {shouldBeDivisibleBy}!"); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,101 @@ |
|||
// Copyright (c) Six Labors and contributors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
/// <summary>
|
|||
/// Implements basic math operations using tolerant comparison
|
|||
/// whenever an equality check is needed.
|
|||
/// </summary>
|
|||
internal readonly struct TolerantMath |
|||
{ |
|||
private readonly double epsilon; |
|||
|
|||
private readonly double negEpsilon; |
|||
|
|||
public TolerantMath(double epsilon) |
|||
{ |
|||
DebugGuard.MustBeGreaterThan(epsilon, 0, nameof(epsilon)); |
|||
|
|||
this.epsilon = epsilon; |
|||
this.negEpsilon = -epsilon; |
|||
} |
|||
|
|||
public static TolerantMath Default { get; } = new TolerantMath(1e-8); |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> == 0
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool IsZero(double a) => a > this.negEpsilon && a < this.epsilon; |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> > 0
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool IsPositive(double a) => a > this.epsilon; |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> < 0
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool IsNegative(double a) => a < this.negEpsilon; |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> == <paramref name="b"/>
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool AreEqual(double a, double b) => this.IsZero(a - b); |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> > <paramref name="b"/>
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool IsGreater(double a, double b) => a > b + this.epsilon; |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> < <paramref name="b"/>
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool IsLess(double a, double b) => a < b - this.epsilon; |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> >= <paramref name="b"/>
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool IsGreaterOrEqual(double a, double b) => a >= b - this.epsilon; |
|||
|
|||
/// <summary>
|
|||
/// <paramref name="a"/> <= <paramref name="b"/>
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public bool IsLessOrEqual(double a, double b) => b >= a - this.epsilon; |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public double Ceiling(double a) |
|||
{ |
|||
double rem = Math.IEEERemainder(a, 1); |
|||
if (this.IsZero(rem)) |
|||
{ |
|||
return Math.Round(a); |
|||
} |
|||
|
|||
return Math.Ceiling(a); |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public double Floor(double a) |
|||
{ |
|||
double rem = Math.IEEERemainder(a, 1); |
|||
if (this.IsZero(rem)) |
|||
{ |
|||
return Math.Round(a); |
|||
} |
|||
|
|||
return Math.Floor(a); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,109 @@ |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp.Tuples |
|||
{ |
|||
/// <summary>
|
|||
/// Contains 8 element value tuples of various types.
|
|||
/// </summary>
|
|||
internal static class Octet |
|||
{ |
|||
/// <summary>
|
|||
/// Value tuple of <see cref="uint"/>-s
|
|||
/// </summary>
|
|||
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))] |
|||
public struct OfUInt32 |
|||
{ |
|||
[FieldOffset(0 * sizeof(uint))] |
|||
public uint V0; |
|||
|
|||
[FieldOffset(1 * sizeof(uint))] |
|||
public uint V1; |
|||
|
|||
[FieldOffset(2 * sizeof(uint))] |
|||
public uint V2; |
|||
|
|||
[FieldOffset(3 * sizeof(uint))] |
|||
public uint V3; |
|||
|
|||
[FieldOffset(4 * sizeof(uint))] |
|||
public uint V4; |
|||
|
|||
[FieldOffset(5 * sizeof(uint))] |
|||
public uint V5; |
|||
|
|||
[FieldOffset(6 * sizeof(uint))] |
|||
public uint V6; |
|||
|
|||
[FieldOffset(7 * sizeof(uint))] |
|||
public uint V7; |
|||
|
|||
public override string ToString() |
|||
{ |
|||
return $"{nameof(Octet)}.{nameof(OfUInt32)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})"; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void LoadFrom(ref OfByte src) |
|||
{ |
|||
this.V0 = src.V0; |
|||
this.V1 = src.V1; |
|||
this.V2 = src.V2; |
|||
this.V3 = src.V3; |
|||
this.V4 = src.V4; |
|||
this.V5 = src.V5; |
|||
this.V6 = src.V6; |
|||
this.V7 = src.V7; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Value tuple of <see cref="byte"/>-s
|
|||
/// </summary>
|
|||
[StructLayout(LayoutKind.Explicit, Size = 8)] |
|||
public struct OfByte |
|||
{ |
|||
[FieldOffset(0)] |
|||
public byte V0; |
|||
|
|||
[FieldOffset(1)] |
|||
public byte V1; |
|||
|
|||
[FieldOffset(2)] |
|||
public byte V2; |
|||
|
|||
[FieldOffset(3)] |
|||
public byte V3; |
|||
|
|||
[FieldOffset(4)] |
|||
public byte V4; |
|||
|
|||
[FieldOffset(5)] |
|||
public byte V5; |
|||
|
|||
[FieldOffset(6)] |
|||
public byte V6; |
|||
|
|||
[FieldOffset(7)] |
|||
public byte V7; |
|||
|
|||
public override string ToString() |
|||
{ |
|||
return $"{nameof(Octet)}.{nameof(OfByte)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})"; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void LoadFrom(ref OfUInt32 src) |
|||
{ |
|||
this.V0 = (byte)src.V0; |
|||
this.V1 = (byte)src.V1; |
|||
this.V2 = (byte)src.V2; |
|||
this.V3 = (byte)src.V3; |
|||
this.V4 = (byte)src.V4; |
|||
this.V5 = (byte)src.V5; |
|||
this.V6 = (byte)src.V6; |
|||
this.V7 = (byte)src.V7; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,3 +1,10 @@ |
|||
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation"> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=common/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=common_005Cexceptions/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=common_005Cexceptions/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=pixelformats_005Cgenerated/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=pixelformats_005Cpackedpixels/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=pixelformats_005Cpixelimplementations/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=pixelformats_005Cpixeltypes/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=pixelformats_005Cutils/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=processing_005Cprocessors_005Ctransforms_005Cresamplers/@EntryIndexedValue">True</s:Boolean> |
|||
<s:Boolean x:Key="/Default/CodeInspection/NamespaceProvider/NamespaceFoldersToSkip/=processing_005Cprocessors_005Ctransforms_005Cresize/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary> |
|||
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue