mirror of https://github.com/SixLabors/ImageSharp
committed by
GitHub
651 changed files with 15620 additions and 14483 deletions
@ -1 +1 @@ |
|||
Subproject commit 353b9afe32a8000410312d17263407cd7bb82d19 |
|||
Subproject commit 1dbfb576c83507645265c79e03369b66cdc0379f |
|||
@ -1,7 +1,4 @@ |
|||
<?xml version="1.0" encoding="utf-8"?> |
|||
<RuleSet Name="ImageSharp" ToolsVersion="17.0"> |
|||
<Include Path="..\shared-infrastructure\sixlabors.ruleset" Action="Default" /> |
|||
<Rules AnalyzerId="StyleCop.Analyzers" RuleNamespace="StyleCop.Analyzers"> |
|||
<Rule Id="SA1011" Action="None" /> |
|||
</Rules> |
|||
</RuleSet> |
|||
@ -1,240 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp; |
|||
|
|||
/// <content>
|
|||
/// Contains constructors and implicit conversion methods.
|
|||
/// </content>
|
|||
public readonly partial struct Color |
|||
{ |
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Rgba64"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Rgba64 pixel) |
|||
{ |
|||
this.data = pixel; |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Rgb48"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Rgb48 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel.R, pixel.G, pixel.B, ushort.MaxValue); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="La32"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(La32 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel.L, pixel.L, pixel.L, pixel.A); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="L16"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(L16 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel.PackedValue, pixel.PackedValue, pixel.PackedValue, ushort.MaxValue); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Rgba32"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Rgba32 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Argb32"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Argb32 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Bgra32"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Bgra32 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Abgr32"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Abgr32 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Rgb24"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Rgb24 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="pixel">The <see cref="Bgr24"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Bgr24 pixel) |
|||
{ |
|||
this.data = new Rgba64(pixel); |
|||
this.boxedHighPrecisionPixel = null; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="Color"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="vector">The <see cref="Vector4"/> containing the color information.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public Color(Vector4 vector) |
|||
{ |
|||
vector = Numerics.Clamp(vector, Vector4.Zero, Vector4.One); |
|||
this.boxedHighPrecisionPixel = new RgbaVector(vector.X, vector.Y, vector.Z, vector.W); |
|||
this.data = default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts a <see cref="Color"/> to <see cref="Vector4"/>.
|
|||
/// </summary>
|
|||
/// <param name="color">The <see cref="Color"/>.</param>
|
|||
/// <returns>The <see cref="Vector4"/>.</returns>
|
|||
public static explicit operator Vector4(Color color) => color.ToScaledVector4(); |
|||
|
|||
/// <summary>
|
|||
/// Converts an <see cref="Vector4"/> to <see cref="Color"/>.
|
|||
/// </summary>
|
|||
/// <param name="source">The <see cref="Vector4"/>.</param>
|
|||
/// <returns>The <see cref="Color"/>.</returns>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static explicit operator Color(Vector4 source) => new(source); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal Rgba32 ToRgba32() |
|||
{ |
|||
if (this.boxedHighPrecisionPixel is null) |
|||
{ |
|||
return this.data.ToRgba32(); |
|||
} |
|||
|
|||
Rgba32 value = default; |
|||
this.boxedHighPrecisionPixel.ToRgba32(ref value); |
|||
return value; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal Bgra32 ToBgra32() |
|||
{ |
|||
if (this.boxedHighPrecisionPixel is null) |
|||
{ |
|||
return this.data.ToBgra32(); |
|||
} |
|||
|
|||
Bgra32 value = default; |
|||
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); |
|||
return value; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal Argb32 ToArgb32() |
|||
{ |
|||
if (this.boxedHighPrecisionPixel is null) |
|||
{ |
|||
return this.data.ToArgb32(); |
|||
} |
|||
|
|||
Argb32 value = default; |
|||
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); |
|||
return value; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal Abgr32 ToAbgr32() |
|||
{ |
|||
if (this.boxedHighPrecisionPixel is null) |
|||
{ |
|||
return this.data.ToAbgr32(); |
|||
} |
|||
|
|||
Abgr32 value = default; |
|||
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); |
|||
return value; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal Rgb24 ToRgb24() |
|||
{ |
|||
if (this.boxedHighPrecisionPixel is null) |
|||
{ |
|||
return this.data.ToRgb24(); |
|||
} |
|||
|
|||
Rgb24 value = default; |
|||
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); |
|||
return value; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal Bgr24 ToBgr24() |
|||
{ |
|||
if (this.boxedHighPrecisionPixel is null) |
|||
{ |
|||
return this.data.ToBgr24(); |
|||
} |
|||
|
|||
Bgr24 value = default; |
|||
value.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); |
|||
return value; |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal Vector4 ToScaledVector4() |
|||
{ |
|||
if (this.boxedHighPrecisionPixel is null) |
|||
{ |
|||
return this.data.ToScaledVector4(); |
|||
} |
|||
|
|||
return this.boxedHighPrecisionPixel.ToScaledVector4(); |
|||
} |
|||
} |
|||
@ -0,0 +1,178 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Buffers.Binary; |
|||
using System.Diagnostics.CodeAnalysis; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
|
|||
namespace SixLabors.ImageSharp; |
|||
|
|||
/// <inheritdoc/>
|
|||
internal interface IShuffle4 : IComponentShuffle |
|||
{ |
|||
} |
|||
|
|||
internal readonly struct DefaultShuffle4([ConstantExpected] byte control) : IShuffle4 |
|||
{ |
|||
public byte Control { get; } = control; |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination) |
|||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
|||
=> HwIntrinsics.Shuffle4Reduce(ref source, ref destination, this.Control); |
|||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination) |
|||
{ |
|||
ref byte sBase = ref MemoryMarshal.GetReference(source); |
|||
ref byte dBase = ref MemoryMarshal.GetReference(destination); |
|||
|
|||
SimdUtils.Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0); |
|||
|
|||
for (nuint i = 0; i < (uint)source.Length; i += 4) |
|||
{ |
|||
Unsafe.Add(ref dBase, i + 0) = Unsafe.Add(ref sBase, p0 + i); |
|||
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); |
|||
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); |
|||
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct WXYZShuffle4 : IShuffle4 |
|||
{ |
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination) |
|||
=> HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle2103); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination) |
|||
{ |
|||
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source)); |
|||
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(destination)); |
|||
uint n = (uint)source.Length / 4; |
|||
|
|||
for (nuint i = 0; i < n; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// ROTL(8, packed) = [Z Y X W]
|
|||
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct WZYXShuffle4 : IShuffle4 |
|||
{ |
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination) |
|||
=> HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle0123); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination) |
|||
{ |
|||
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source)); |
|||
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(destination)); |
|||
uint n = (uint)source.Length / 4; |
|||
|
|||
for (nuint i = 0; i < n; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// REVERSE(packedArgb) = [X Y Z W]
|
|||
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct YZWXShuffle4 : IShuffle4 |
|||
{ |
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination) |
|||
=> HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle0321); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination) |
|||
{ |
|||
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source)); |
|||
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(destination)); |
|||
uint n = (uint)source.Length / 4; |
|||
|
|||
for (nuint i = 0; i < n; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// ROTR(8, packedArgb) = [Y Z W X]
|
|||
Unsafe.Add(ref dBase, i) = BitOperations.RotateRight(packed, 8); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct ZYXWShuffle4 : IShuffle4 |
|||
{ |
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination) |
|||
=> HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle3012); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination) |
|||
{ |
|||
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source)); |
|||
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(destination)); |
|||
uint n = (uint)source.Length / 4; |
|||
|
|||
for (nuint i = 0; i < n; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// tmp1 = [W 0 Y 0]
|
|||
// tmp2 = [0 Z 0 X]
|
|||
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
|
|||
// tmp1 + tmp3 = [W X Y Z]
|
|||
uint tmp1 = packed & 0xFF00FF00; |
|||
uint tmp2 = packed & 0x00FF00FF; |
|||
uint tmp3 = BitOperations.RotateLeft(tmp2, 16); |
|||
|
|||
Unsafe.Add(ref dBase, i) = tmp1 + tmp3; |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct XWZYShuffle4 : IShuffle4 |
|||
{ |
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination) |
|||
=> HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle1230); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination) |
|||
{ |
|||
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source)); |
|||
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(destination)); |
|||
uint n = (uint)source.Length / 4; |
|||
|
|||
for (nuint i = 0; i < n; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// tmp1 = [0 Z 0 X]
|
|||
// tmp2 = [W 0 Y 0]
|
|||
// tmp3=ROTL(16, tmp2) = [Y 0 W 0]
|
|||
// tmp1 + tmp3 = [Y Z W X]
|
|||
uint tmp1 = packed & 0x00FF00FF; |
|||
uint tmp2 = packed & 0xFF00FF00; |
|||
uint tmp3 = BitOperations.RotateLeft(tmp2, 16); |
|||
|
|||
Unsafe.Add(ref dBase, i) = tmp1 + tmp3; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,78 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp; |
|||
|
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Converts all input <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1].
|
|||
/// <paramref name="source"/> should be the of the same size as <paramref name="destination"/>,
|
|||
/// but there are no restrictions on the span's length.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of bytes</param>
|
|||
/// <param name="destination">The destination span of floats</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> destination) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref destination); |
|||
|
|||
if (source.Length > 0) |
|||
{ |
|||
ConvertByteToNormalizedFloatRemainder(source, destination); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'destination' buffer of <see cref="byte"/>.
|
|||
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
|
|||
/// <paramref name="source"/> should be the of the same size as <paramref name="destination"/>,
|
|||
/// but there are no restrictions on the span's length.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of floats</param>
|
|||
/// <param name="destination">The destination span of bytes</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> destination) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref destination); |
|||
|
|||
if (source.Length > 0) |
|||
{ |
|||
ConvertNormalizedFloatToByteRemainder(source, destination); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.NoInlining)] |
|||
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> destination) |
|||
{ |
|||
ref byte sBase = ref MemoryMarshal.GetReference(source); |
|||
ref float dBase = ref MemoryMarshal.GetReference(destination); |
|||
|
|||
for (int i = 0; i < source.Length; i++) |
|||
{ |
|||
Unsafe.Add(ref dBase, (uint)i) = Unsafe.Add(ref sBase, (uint)i) / 255f; |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.NoInlining)] |
|||
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> destination) |
|||
{ |
|||
ref float sBase = ref MemoryMarshal.GetReference(source); |
|||
ref byte dBase = ref MemoryMarshal.GetReference(destination); |
|||
|
|||
for (int i = 0; i < source.Length; i++) |
|||
{ |
|||
Unsafe.Add(ref dBase, (uint)i) = ConvertToByte(Unsafe.Add(ref sBase, (uint)i)); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private static byte ConvertToByte(float f) => (byte)Numerics.Clamp((f * 255f) + 0.5f, 0, 255f); |
|||
} |
|||
@ -1,182 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
// ReSharper disable MemberHidesStaticFromOuterClass
|
|||
namespace SixLabors.ImageSharp; |
|||
|
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Implementation methods based on newer <see cref="Vector{T}"/> API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*).
|
|||
/// Only accelerated only on RyuJIT having dotnet/coreclr#10662 merged (.NET Core 2.1+ .NET 4.7.2+)
|
|||
/// See:
|
|||
/// https://github.com/dotnet/coreclr/pull/10662
|
|||
/// API Proposal:
|
|||
/// https://github.com/dotnet/corefx/issues/15957
|
|||
/// </summary>
|
|||
public static class ExtendedIntrinsics |
|||
{ |
|||
public static bool IsAvailable { get; } = Vector.IsHardwareAccelerated; |
|||
|
|||
/// <summary>
|
|||
/// Widen and convert a vector of <see cref="short"/> values into 2 vectors of <see cref="float"/>-s.
|
|||
/// </summary>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
internal static void ConvertToSingle( |
|||
Vector<short> source, |
|||
out Vector<float> dest1, |
|||
out Vector<float> dest2) |
|||
{ |
|||
Vector.Widen(source, out Vector<int> i1, out Vector<int> i2); |
|||
dest1 = Vector.ConvertToSingle(i1); |
|||
dest2 = Vector.ConvertToSingle(i2); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void ByteToNormalizedFloatReduce( |
|||
ref ReadOnlySpan<byte> source, |
|||
ref Span<float> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (!IsAvailable) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]); |
|||
|
|||
source = source[adjustedCount..]; |
|||
dest = dest[adjustedCount..]; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void NormalizedFloatToByteSaturateReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (!IsAvailable) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
NormalizedFloatToByteSaturate(source[..adjustedCount], dest[..adjustedCount]); |
|||
|
|||
source = source[adjustedCount..]; |
|||
dest = dest[adjustedCount..]; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
|
|||
/// </summary>
|
|||
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, Vector<byte>.Count); |
|||
|
|||
nuint n = dest.VectorCount<byte>(); |
|||
|
|||
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
for (nuint i = 0; i < n; i++) |
|||
{ |
|||
Vector<byte> b = Unsafe.Add(ref sourceBase, i); |
|||
|
|||
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1); |
|||
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1); |
|||
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3); |
|||
|
|||
Vector<float> f0 = ConvertToSingle(w0); |
|||
Vector<float> f1 = ConvertToSingle(w1); |
|||
Vector<float> f2 = ConvertToSingle(w2); |
|||
Vector<float> f3 = ConvertToSingle(w3); |
|||
|
|||
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4); |
|||
d = f0; |
|||
Unsafe.Add(ref d, 1) = f1; |
|||
Unsafe.Add(ref d, 2) = f2; |
|||
Unsafe.Add(ref d, 3) = f3; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
|
|||
/// </summary>
|
|||
internal static void NormalizedFloatToByteSaturate( |
|||
ReadOnlySpan<float> source, |
|||
Span<byte> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, Vector<byte>.Count); |
|||
|
|||
nuint n = dest.VectorCount<byte>(); |
|||
|
|||
ref Vector<float> sourceBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
for (nuint i = 0; i < n; i++) |
|||
{ |
|||
ref Vector<float> s = ref Unsafe.Add(ref sourceBase, i * 4); |
|||
|
|||
Vector<float> f0 = s; |
|||
Vector<float> f1 = Unsafe.Add(ref s, 1); |
|||
Vector<float> f2 = Unsafe.Add(ref s, 2); |
|||
Vector<float> f3 = Unsafe.Add(ref s, 3); |
|||
|
|||
Vector<uint> w0 = ConvertToUInt32(f0); |
|||
Vector<uint> w1 = ConvertToUInt32(f1); |
|||
Vector<uint> w2 = ConvertToUInt32(f2); |
|||
Vector<uint> w3 = ConvertToUInt32(f3); |
|||
|
|||
var u0 = Vector.Narrow(w0, w1); |
|||
var u1 = Vector.Narrow(w2, w3); |
|||
|
|||
Unsafe.Add(ref destBase, i) = Vector.Narrow(u0, u1); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private static Vector<uint> ConvertToUInt32(Vector<float> vf) |
|||
{ |
|||
var maxBytes = new Vector<float>(255f); |
|||
vf *= maxBytes; |
|||
vf += new Vector<float>(0.5f); |
|||
vf = Vector.Min(Vector.Max(vf, Vector<float>.Zero), maxBytes); |
|||
var vi = Vector.ConvertToInt32(vf); |
|||
return Vector.AsVectorUInt32(vi); |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private static Vector<float> ConvertToSingle(Vector<uint> u) |
|||
{ |
|||
var vi = Vector.AsVectorInt32(u); |
|||
var v = Vector.ConvertToSingle(vi); |
|||
v *= new Vector<float>(1f / 255f); |
|||
return v; |
|||
} |
|||
} |
|||
} |
|||
@ -1,144 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
// ReSharper disable MemberHidesStaticFromOuterClass
|
|||
namespace SixLabors.ImageSharp; |
|||
|
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Fallback implementation based on <see cref="Vector4"/> (128bit).
|
|||
/// For <see cref="Vector4"/>, efficient software fallback implementations are present,
|
|||
/// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P
|
|||
/// </summary>
|
|||
public static class FallbackIntrinsics128 |
|||
{ |
|||
/// <summary>
|
|||
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void ByteToNormalizedFloatReduce( |
|||
ref ReadOnlySpan<byte> source, |
|||
ref Span<float> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
int remainder = Numerics.Modulo4(source.Length); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]); |
|||
|
|||
source = source[adjustedCount..]; |
|||
dest = dest[adjustedCount..]; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void NormalizedFloatToByteSaturateReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
int remainder = Numerics.Modulo4(source.Length); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
NormalizedFloatToByteSaturate( |
|||
source[..adjustedCount], |
|||
dest[..adjustedCount]); |
|||
|
|||
source = source[adjustedCount..]; |
|||
dest = dest[adjustedCount..]; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.ByteToNormalizedFloat"/> using <see cref="Vector4"/>.
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ColdPath)] |
|||
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, 4); |
|||
|
|||
uint count = (uint)dest.Length / 4; |
|||
if (count == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref ByteVector4 sBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(source)); |
|||
ref Vector4 dBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
const float scale = 1f / 255f; |
|||
Vector4 d = default; |
|||
|
|||
for (nuint i = 0; i < count; i++) |
|||
{ |
|||
ref ByteVector4 s = ref Unsafe.Add(ref sBase, i); |
|||
d.X = s.X; |
|||
d.Y = s.Y; |
|||
d.Z = s.Z; |
|||
d.W = s.W; |
|||
d *= scale; |
|||
Unsafe.Add(ref dBase, i) = d; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/> using <see cref="Vector4"/>.
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ColdPath)] |
|||
internal static void NormalizedFloatToByteSaturate( |
|||
ReadOnlySpan<float> source, |
|||
Span<byte> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, 4); |
|||
|
|||
uint count = (uint)source.Length / 4; |
|||
if (count == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref Vector4 sBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(source)); |
|||
ref ByteVector4 dBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
var half = new Vector4(0.5f); |
|||
var maxBytes = new Vector4(255f); |
|||
|
|||
for (nuint i = 0; i < count; i++) |
|||
{ |
|||
Vector4 s = Unsafe.Add(ref sBase, i); |
|||
s *= maxBytes; |
|||
s += half; |
|||
s = Numerics.Clamp(s, Vector4.Zero, maxBytes); |
|||
|
|||
ref ByteVector4 d = ref Unsafe.Add(ref dBase, i); |
|||
d.X = (byte)s.X; |
|||
d.Y = (byte)s.Y; |
|||
d.Z = (byte)s.Z; |
|||
d.W = (byte)s.W; |
|||
} |
|||
} |
|||
|
|||
[StructLayout(LayoutKind.Sequential)] |
|||
private struct ByteVector4 |
|||
{ |
|||
public byte X; |
|||
public byte Y; |
|||
public byte Z; |
|||
public byte W; |
|||
} |
|||
} |
|||
} |
|||
File diff suppressed because it is too large
@ -0,0 +1,250 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Diagnostics; |
|||
using System.Diagnostics.CodeAnalysis; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.Arm; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp.Common.Helpers; |
|||
|
|||
/// <summary>
|
|||
/// Defines utility methods for <see cref="Vector128{T}"/> that have either:
|
|||
/// <list type="number">
|
|||
/// <item>Not yet been normalized in the runtime.</item>
|
|||
/// <item>Produce codegen that is poorly optimized by the runtime.</item>
|
|||
/// </list>
|
|||
/// Should only be used if the intrinsics are available.
|
|||
/// </summary>
|
|||
internal static class Vector128Utilities |
|||
{ |
|||
/// <summary>
|
|||
/// Gets a value indicating whether shuffle operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsShuffleFloat |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Sse.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets a value indicating whether shuffle operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsShuffleByte |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets a value indicating whether right align operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsRightAlign |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Ssse3.IsSupported || AdvSimd.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets a value indicating whether right or left byte shift operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsShiftByte |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Sse2.IsSupported || AdvSimd.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Creates a new vector by selecting values from an input vector using the control.
|
|||
/// </summary>
|
|||
/// <param name="vector">The input vector from which values are selected.</param>
|
|||
/// <param name="control">The shuffle control byte.</param>
|
|||
/// <returns>The <see cref="Vector128{Single}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpected] byte control) |
|||
{ |
|||
if (Sse.IsSupported) |
|||
{ |
|||
return Sse.Shuffle(vector, vector, control); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Creates a new vector by selecting values from an input vector using a set of indices.
|
|||
/// </summary>
|
|||
/// <param name="vector">
|
|||
/// The input vector from which values are selected.</param>
|
|||
/// <param name="indices">
|
|||
/// The per-element indices used to select a value from <paramref name="vector" />.
|
|||
/// </param>
|
|||
/// <returns>
|
|||
/// A new vector containing the values from <paramref name="vector" /> selected by the given <paramref name="indices" />.
|
|||
/// </returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> indices) |
|||
{ |
|||
if (Ssse3.IsSupported) |
|||
{ |
|||
return Ssse3.Shuffle(vector, indices); |
|||
} |
|||
|
|||
if (AdvSimd.Arm64.IsSupported) |
|||
{ |
|||
return AdvSimd.Arm64.VectorTableLookup(vector, indices); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Shifts a 128-bit value right by a specified number of bytes while shifting in zeros.
|
|||
/// </summary>
|
|||
/// <param name="value">The value to shift.</param>
|
|||
/// <param name="numBytes">The number of bytes to shift by.</param>
|
|||
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector128<byte> ShiftRightBytesInVector(Vector128<byte> value, [ConstantExpected(Max = (byte)15)] byte numBytes) |
|||
{ |
|||
if (Sse2.IsSupported) |
|||
{ |
|||
return Sse2.ShiftRightLogical128BitLane(value, numBytes); |
|||
} |
|||
|
|||
if (AdvSimd.IsSupported) |
|||
{ |
|||
return AdvSimd.ExtractVector128(value, Vector128<byte>.Zero, numBytes); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Shifts a 128-bit value left by a specified number of bytes while shifting in zeros.
|
|||
/// </summary>
|
|||
/// <param name="value">The value to shift.</param>
|
|||
/// <param name="numBytes">The number of bytes to shift by.</param>
|
|||
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector128<byte> ShiftLeftBytesInVector(Vector128<byte> value, [ConstantExpected(Max = (byte)15)] byte numBytes) |
|||
{ |
|||
if (Sse2.IsSupported) |
|||
{ |
|||
return Sse2.ShiftLeftLogical128BitLane(value, numBytes); |
|||
} |
|||
|
|||
if (AdvSimd.IsSupported) |
|||
{ |
|||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
|||
return AdvSimd.ExtractVector128(Vector128<byte>.Zero, value, (byte)(Vector128<byte>.Count - numBytes)); |
|||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Right aligns elements of two source 128-bit values depending on bits in a mask.
|
|||
/// </summary>
|
|||
/// <param name="left">The left hand source vector.</param>
|
|||
/// <param name="right">The right hand source vector.</param>
|
|||
/// <param name="mask">An 8-bit mask used for the operation.</param>
|
|||
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector128<byte> AlignRight(Vector128<byte> left, Vector128<byte> right, [ConstantExpected(Max = (byte)15)] byte mask) |
|||
{ |
|||
if (Ssse3.IsSupported) |
|||
{ |
|||
return Ssse3.AlignRight(left, right, mask); |
|||
} |
|||
|
|||
if (AdvSimd.IsSupported) |
|||
{ |
|||
return AdvSimd.ExtractVector128(right, left, mask); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Performs a conversion from a 128-bit vector of 4 single-precision floating-point values to a 128-bit vector of 4 signed 32-bit integer values.
|
|||
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
|
|||
/// </summary>
|
|||
/// <param name="vector">The value to convert.</param>
|
|||
/// <returns>The <see cref="Vector128{Int32}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector128<int> ConvertToInt32RoundToEven(Vector128<float> vector) |
|||
{ |
|||
if (Sse2.IsSupported) |
|||
{ |
|||
return Sse2.ConvertToVector128Int32(vector); |
|||
} |
|||
|
|||
if (AdvSimd.IsSupported) |
|||
{ |
|||
return AdvSimd.ConvertToInt32RoundToEven(vector); |
|||
} |
|||
|
|||
Vector128<float> sign = vector & Vector128.Create(-0.0f); |
|||
Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608.0f); |
|||
|
|||
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; |
|||
return Vector128.ConvertToInt32(val_2p23_f32 | sign); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Packs signed 16-bit integers to unsigned 8-bit integers and saturates.
|
|||
/// </summary>
|
|||
/// <param name="left">The left hand source vector.</param>
|
|||
/// <param name="right">The right hand source vector.</param>
|
|||
/// <returns>The <see cref="Vector128{Int16}"/>.</returns>
|
|||
public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right) |
|||
{ |
|||
if (Sse2.IsSupported) |
|||
{ |
|||
return Sse2.PackUnsignedSaturate(left, right); |
|||
} |
|||
|
|||
if (AdvSimd.IsSupported) |
|||
{ |
|||
return AdvSimd.ExtractNarrowingSaturateUnsignedUpper(AdvSimd.ExtractNarrowingSaturateUnsignedLower(left), right); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Packs signed 32-bit integers to signed 16-bit integers and saturates.
|
|||
/// </summary>
|
|||
/// <param name="left">The left hand source vector.</param>
|
|||
/// <param name="right">The right hand source vector.</param>
|
|||
/// <returns>The <see cref="Vector128{Int16}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128<int> right) |
|||
{ |
|||
if (Sse2.IsSupported) |
|||
{ |
|||
return Sse2.PackSignedSaturate(left, right); |
|||
} |
|||
|
|||
if (AdvSimd.IsSupported) |
|||
{ |
|||
return AdvSimd.ExtractNarrowingSaturateUpper(AdvSimd.ExtractNarrowingSaturateLower(left), right); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
[DoesNotReturn] |
|||
private static void ThrowUnreachableException() => throw new UnreachableException(); |
|||
} |
|||
@ -0,0 +1,115 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Diagnostics; |
|||
using System.Diagnostics.CodeAnalysis; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp.Common.Helpers; |
|||
|
|||
/// <summary>
|
|||
/// Defines utility methods for <see cref="Vector256{T}"/> that have either:
|
|||
/// <list type="number">
|
|||
/// <item>Not yet been normalized in the runtime.</item>
|
|||
/// <item>Produce codegen that is poorly optimized by the runtime.</item>
|
|||
/// </list>
|
|||
/// Should only be used if the intrinsics are available.
|
|||
/// </summary>
|
|||
internal static class Vector256Utilities |
|||
{ |
|||
/// <summary>
|
|||
/// Gets a value indicating whether shuffle byte operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsShuffleFloat |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Avx.IsSupported || Sse.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets a value indicating whether shuffle byte operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsShuffleByte |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Avx2.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Creates a new vector by selecting values from an input vector using a set of indices.
|
|||
/// </summary>
|
|||
/// <param name="vector">The input vector from which values are selected.</param>
|
|||
/// <param name="control">The shuffle control byte.</param>
|
|||
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector256<float> Shuffle(Vector256<float> vector, [ConstantExpected] byte control) |
|||
{ |
|||
if (Avx.IsSupported) |
|||
{ |
|||
return Avx.Shuffle(vector, vector, control); |
|||
} |
|||
|
|||
if (Sse.IsSupported) |
|||
{ |
|||
Vector128<float> lower = vector.GetLower(); |
|||
Vector128<float> upper = vector.GetUpper(); |
|||
return Vector256.Create(Sse.Shuffle(lower, lower, control), Sse.Shuffle(upper, upper, control)); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Creates a new vector by selecting values from an input vector using a set of indices.</summary>
|
|||
/// <param name="vector">
|
|||
/// The input vector from which values are selected.</param>
|
|||
/// <param name="indices">
|
|||
/// The per-element indices used to select a value from <paramref name="vector" />.
|
|||
/// </param>
|
|||
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector256<byte> Shuffle(Vector256<byte> vector, Vector256<byte> indices) |
|||
{ |
|||
if (Avx2.IsSupported) |
|||
{ |
|||
return Avx2.Shuffle(vector, indices); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Performs a conversion from a 256-bit vector of 8 single-precision floating-point values to a 256-bit vector of 8 signed 32-bit integer values.
|
|||
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
|
|||
/// </summary>
|
|||
/// <param name="vector">The value to convert.</param>
|
|||
/// <returns>The <see cref="Vector256{Int32}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector256<int> ConvertToInt32RoundToEven(Vector256<float> vector) |
|||
{ |
|||
if (Avx.IsSupported) |
|||
{ |
|||
return Avx.ConvertToVector256Int32(vector); |
|||
} |
|||
|
|||
if (Sse2.IsSupported) |
|||
{ |
|||
Vector128<int> lower = Sse2.ConvertToVector128Int32(vector.GetLower()); |
|||
Vector128<int> upper = Sse2.ConvertToVector128Int32(vector.GetUpper()); |
|||
return Vector256.Create(lower, upper); |
|||
} |
|||
|
|||
Vector256<float> sign = vector & Vector256.Create(-0.0f); |
|||
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608.0f); |
|||
|
|||
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; |
|||
return Vector256.ConvertToInt32(val_2p23_f32 | sign); |
|||
} |
|||
|
|||
[DoesNotReturn] |
|||
private static void ThrowUnreachableException() => throw new UnreachableException(); |
|||
} |
|||
@ -0,0 +1,115 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Diagnostics; |
|||
using System.Diagnostics.CodeAnalysis; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp.Common.Helpers; |
|||
|
|||
/// <summary>
|
|||
/// Defines utility methods for <see cref="Vector512{T}"/> that have either:
|
|||
/// <list type="number">
|
|||
/// <item>Not yet been normalized in the runtime.</item>
|
|||
/// <item>Produce codegen that is poorly optimized by the runtime.</item>
|
|||
/// </list>
|
|||
/// Should only be used if the intrinsics are available.
|
|||
/// </summary>
|
|||
internal static class Vector512Utilities |
|||
{ |
|||
/// <summary>
|
|||
/// Gets a value indicating whether shuffle float operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsShuffleFloat |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Avx512F.IsSupported || Avx.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets a value indicating whether shuffle byte operations are supported.
|
|||
/// </summary>
|
|||
public static bool SupportsShuffleByte |
|||
{ |
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
get => Avx512BW.IsSupported; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Creates a new vector by selecting values from an input vector using the control.
|
|||
/// </summary>
|
|||
/// <param name="vector">The input vector from which values are selected.</param>
|
|||
/// <param name="control">The shuffle control byte.</param>
|
|||
/// <returns>The <see cref="Vector512{Single}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector512<float> Shuffle(Vector512<float> vector, [ConstantExpected] byte control) |
|||
{ |
|||
if (Avx512F.IsSupported) |
|||
{ |
|||
return Avx512F.Shuffle(vector, vector, control); |
|||
} |
|||
|
|||
if (Avx.IsSupported) |
|||
{ |
|||
Vector256<float> lower = vector.GetLower(); |
|||
Vector256<float> upper = vector.GetUpper(); |
|||
return Vector512.Create(Avx.Shuffle(lower, lower, control), Avx.Shuffle(upper, upper, control)); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Creates a new vector by selecting values from an input vector using a set of indices.
|
|||
/// </summary>
|
|||
/// <param name="vector">The input vector from which values are selected.</param>
|
|||
/// <param name="indices">
|
|||
/// The per-element indices used to select a value from <paramref name="vector" />.
|
|||
/// </param>
|
|||
/// <returns>The <see cref="Vector512{Byte}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector512<byte> Shuffle(Vector512<byte> vector, Vector512<byte> indices) |
|||
{ |
|||
if (Avx512BW.IsSupported) |
|||
{ |
|||
return Avx512BW.Shuffle(vector, indices); |
|||
} |
|||
|
|||
ThrowUnreachableException(); |
|||
return default; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Performs a conversion from a 512-bit vector of 16 single-precision floating-point values to a 512-bit vector of 16 signed 32-bit integer values.
|
|||
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
|
|||
/// </summary>
|
|||
/// <param name="vector">The value to convert.</param>
|
|||
/// <returns>The <see cref="Vector128{Int32}"/>.</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector) |
|||
{ |
|||
if (Avx512F.IsSupported) |
|||
{ |
|||
return Avx512F.ConvertToVector512Int32(vector); |
|||
} |
|||
|
|||
if (Avx.IsSupported) |
|||
{ |
|||
Vector256<int> lower = Avx.ConvertToVector256Int32(vector.GetLower()); |
|||
Vector256<int> upper = Avx.ConvertToVector256Int32(vector.GetUpper()); |
|||
return Vector512.Create(lower, upper); |
|||
} |
|||
|
|||
Vector512<float> sign = vector & Vector512.Create(-0.0f); |
|||
Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608.0f); |
|||
|
|||
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32; |
|||
return Vector512.ConvertToInt32(val_2p23_f32 | sign); |
|||
} |
|||
|
|||
[DoesNotReturn] |
|||
private static void ThrowUnreachableException() => throw new UnreachableException(); |
|||
} |
|||
@ -1,69 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Compression.Zlib; |
|||
|
|||
/// <content>
|
|||
/// Contains precalulated tables for scalar calculations.
|
|||
/// </content>
|
|||
internal static partial class Crc32 |
|||
{ |
|||
/// <summary>
|
|||
/// The table of all possible eight bit values for fast scalar lookup.
|
|||
/// </summary>
|
|||
private static readonly uint[] CrcTable = |
|||
{ |
|||
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, |
|||
0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, |
|||
0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, |
|||
0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, |
|||
0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, |
|||
0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, |
|||
0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, |
|||
0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, |
|||
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, |
|||
0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A, |
|||
0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, |
|||
0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, |
|||
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, |
|||
0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, |
|||
0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, |
|||
0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, |
|||
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, |
|||
0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, |
|||
0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, |
|||
0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, |
|||
0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, |
|||
0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, |
|||
0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, |
|||
0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, |
|||
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, |
|||
0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, |
|||
0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, |
|||
0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, |
|||
0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, |
|||
0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, |
|||
0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, |
|||
0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, |
|||
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, |
|||
0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C, |
|||
0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, |
|||
0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, |
|||
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, |
|||
0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, |
|||
0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, |
|||
0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, |
|||
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, |
|||
0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, |
|||
0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, |
|||
0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, |
|||
0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, |
|||
0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, |
|||
0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, |
|||
0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, |
|||
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, |
|||
0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, |
|||
0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, |
|||
0x2D02EF8D |
|||
}; |
|||
} |
|||
@ -1,308 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using ArmCrc32 = System.Runtime.Intrinsics.Arm.Crc32; |
|||
|
|||
namespace SixLabors.ImageSharp.Compression.Zlib; |
|||
|
|||
/// <summary>
|
|||
/// Calculates the 32 bit Cyclic Redundancy Check (CRC) checksum of a given buffer
|
|||
/// according to the IEEE 802.3 specification.
|
|||
/// </summary>
|
|||
internal static partial class Crc32 |
|||
{ |
|||
/// <summary>
|
|||
/// The default initial seed value of a Crc32 checksum calculation.
|
|||
/// </summary>
|
|||
public const uint SeedValue = 0U; |
|||
|
|||
private const int MinBufferSize = 64; |
|||
private const int ChunksizeMask = 15; |
|||
|
|||
// Definitions of the bit-reflected domain constants k1, k2, k3, etc and
|
|||
// the CRC32+Barrett polynomials given at the end of the paper.
|
|||
private static readonly ulong[] K05Poly = |
|||
{ |
|||
0x0154442bd4, 0x01c6e41596, // k1, k2
|
|||
0x01751997d0, 0x00ccaa009e, // k3, k4
|
|||
0x0163cd6124, 0x0000000000, // k5, k0
|
|||
0x01db710641, 0x01f7011641 // polynomial
|
|||
}; |
|||
|
|||
/// <summary>
|
|||
/// Calculates the CRC checksum with the bytes taken from the span.
|
|||
/// </summary>
|
|||
/// <param name="buffer">The readonly span of bytes.</param>
|
|||
/// <returns>The <see cref="uint"/>.</returns>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static uint Calculate(ReadOnlySpan<byte> buffer) |
|||
=> Calculate(SeedValue, buffer); |
|||
|
|||
/// <summary>
|
|||
/// Calculates the CRC checksum with the bytes taken from the span and seed.
|
|||
/// </summary>
|
|||
/// <param name="crc">The input CRC value.</param>
|
|||
/// <param name="buffer">The readonly span of bytes.</param>
|
|||
/// <returns>The <see cref="uint"/>.</returns>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static uint Calculate(uint crc, ReadOnlySpan<byte> buffer) |
|||
{ |
|||
if (buffer.IsEmpty) |
|||
{ |
|||
return crc; |
|||
} |
|||
|
|||
if (Sse41.IsSupported && Pclmulqdq.IsSupported && buffer.Length >= MinBufferSize) |
|||
{ |
|||
return ~CalculateSse(~crc, buffer); |
|||
} |
|||
|
|||
if (ArmCrc32.Arm64.IsSupported) |
|||
{ |
|||
return ~CalculateArm64(~crc, buffer); |
|||
} |
|||
|
|||
if (ArmCrc32.IsSupported) |
|||
{ |
|||
return ~CalculateArm(~crc, buffer); |
|||
} |
|||
|
|||
return ~CalculateScalar(~crc, buffer); |
|||
} |
|||
|
|||
// Based on https://github.com/chromium/chromium/blob/master/third_party/zlib/crc32_simd.c
|
|||
[MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] |
|||
private static unsafe uint CalculateSse(uint crc, ReadOnlySpan<byte> buffer) |
|||
{ |
|||
int chunksize = buffer.Length & ~ChunksizeMask; |
|||
int length = chunksize; |
|||
|
|||
fixed (byte* bufferPtr = buffer) |
|||
{ |
|||
fixed (ulong* k05PolyPtr = K05Poly) |
|||
{ |
|||
byte* localBufferPtr = bufferPtr; |
|||
ulong* localK05PolyPtr = k05PolyPtr; |
|||
|
|||
// There's at least one block of 64.
|
|||
Vector128<ulong> x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00)); |
|||
Vector128<ulong> x2 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10)); |
|||
Vector128<ulong> x3 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20)); |
|||
Vector128<ulong> x4 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30)); |
|||
Vector128<ulong> x5; |
|||
|
|||
x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64()); |
|||
|
|||
// k1, k2
|
|||
Vector128<ulong> x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0); |
|||
|
|||
localBufferPtr += 64; |
|||
length -= 64; |
|||
|
|||
// Parallel fold blocks of 64, if any.
|
|||
while (length >= 64) |
|||
{ |
|||
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00); |
|||
Vector128<ulong> x6 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00); |
|||
Vector128<ulong> x7 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x00); |
|||
Vector128<ulong> x8 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x00); |
|||
|
|||
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11); |
|||
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x11); |
|||
x3 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x11); |
|||
x4 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x11); |
|||
|
|||
Vector128<ulong> y5 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00)); |
|||
Vector128<ulong> y6 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10)); |
|||
Vector128<ulong> y7 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20)); |
|||
Vector128<ulong> y8 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30)); |
|||
|
|||
x1 = Sse2.Xor(x1, x5); |
|||
x2 = Sse2.Xor(x2, x6); |
|||
x3 = Sse2.Xor(x3, x7); |
|||
x4 = Sse2.Xor(x4, x8); |
|||
|
|||
x1 = Sse2.Xor(x1, y5); |
|||
x2 = Sse2.Xor(x2, y6); |
|||
x3 = Sse2.Xor(x3, y7); |
|||
x4 = Sse2.Xor(x4, y8); |
|||
|
|||
localBufferPtr += 64; |
|||
length -= 64; |
|||
} |
|||
|
|||
// Fold into 128-bits.
|
|||
// k3, k4
|
|||
x0 = Sse2.LoadVector128(k05PolyPtr + 0x2); |
|||
|
|||
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00); |
|||
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11); |
|||
x1 = Sse2.Xor(x1, x2); |
|||
x1 = Sse2.Xor(x1, x5); |
|||
|
|||
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00); |
|||
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11); |
|||
x1 = Sse2.Xor(x1, x3); |
|||
x1 = Sse2.Xor(x1, x5); |
|||
|
|||
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00); |
|||
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11); |
|||
x1 = Sse2.Xor(x1, x4); |
|||
x1 = Sse2.Xor(x1, x5); |
|||
|
|||
// Single fold blocks of 16, if any.
|
|||
while (length >= 16) |
|||
{ |
|||
x2 = Sse2.LoadVector128((ulong*)localBufferPtr); |
|||
|
|||
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00); |
|||
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11); |
|||
x1 = Sse2.Xor(x1, x2); |
|||
x1 = Sse2.Xor(x1, x5); |
|||
|
|||
localBufferPtr += 16; |
|||
length -= 16; |
|||
} |
|||
|
|||
// Fold 128 - bits to 64 - bits.
|
|||
x2 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x10); |
|||
x3 = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); // _mm_setr_epi32 on x86
|
|||
x1 = Sse2.ShiftRightLogical128BitLane(x1, 8); |
|||
x1 = Sse2.Xor(x1, x2); |
|||
|
|||
// k5, k0
|
|||
x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4); |
|||
|
|||
x2 = Sse2.ShiftRightLogical128BitLane(x1, 4); |
|||
x1 = Sse2.And(x1, x3); |
|||
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00); |
|||
x1 = Sse2.Xor(x1, x2); |
|||
|
|||
// Barret reduce to 32-bits.
|
|||
// polynomial
|
|||
x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6); |
|||
|
|||
x2 = Sse2.And(x1, x3); |
|||
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10); |
|||
x2 = Sse2.And(x2, x3); |
|||
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00); |
|||
x1 = Sse2.Xor(x1, x2); |
|||
|
|||
crc = (uint)Sse41.Extract(x1.AsInt32(), 1); |
|||
return buffer.Length - chunksize == 0 ? crc : CalculateScalar(crc, buffer[chunksize..]); |
|||
} |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] |
|||
private static unsafe uint CalculateArm(uint crc, ReadOnlySpan<byte> buffer) |
|||
{ |
|||
fixed (byte* bufferPtr = buffer) |
|||
{ |
|||
byte* localBufferPtr = bufferPtr; |
|||
int len = buffer.Length; |
|||
|
|||
while (len > 0 && ((ulong)localBufferPtr & 3) != 0) |
|||
{ |
|||
crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); |
|||
len--; |
|||
} |
|||
|
|||
uint* intBufferPtr = (uint*)localBufferPtr; |
|||
|
|||
while (len >= 8 * sizeof(uint)) |
|||
{ |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
len -= 8 * sizeof(uint); |
|||
} |
|||
|
|||
while (len >= sizeof(uint)) |
|||
{ |
|||
crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); |
|||
len -= sizeof(uint); |
|||
} |
|||
|
|||
localBufferPtr = (byte*)intBufferPtr; |
|||
|
|||
while (len > 0) |
|||
{ |
|||
crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); |
|||
len--; |
|||
} |
|||
|
|||
return crc; |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] |
|||
private static unsafe uint CalculateArm64(uint crc, ReadOnlySpan<byte> buffer) |
|||
{ |
|||
fixed (byte* bufferPtr = buffer) |
|||
{ |
|||
byte* localBufferPtr = bufferPtr; |
|||
int len = buffer.Length; |
|||
|
|||
while (len > 0 && ((ulong)localBufferPtr & 7) != 0) |
|||
{ |
|||
crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); |
|||
len--; |
|||
} |
|||
|
|||
ulong* longBufferPtr = (ulong*)localBufferPtr; |
|||
|
|||
while (len >= 8 * sizeof(ulong)) |
|||
{ |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
len -= 8 * sizeof(ulong); |
|||
} |
|||
|
|||
while (len >= sizeof(ulong)) |
|||
{ |
|||
crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); |
|||
len -= sizeof(ulong); |
|||
} |
|||
|
|||
localBufferPtr = (byte*)longBufferPtr; |
|||
|
|||
while (len > 0) |
|||
{ |
|||
crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); |
|||
len--; |
|||
} |
|||
|
|||
return crc; |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] |
|||
private static uint CalculateScalar(uint crc, ReadOnlySpan<byte> buffer) |
|||
{ |
|||
ref uint crcTableRef = ref MemoryMarshal.GetReference(CrcTable.AsSpan()); |
|||
ref byte bufferRef = ref MemoryMarshal.GetReference(buffer); |
|||
|
|||
for (int i = 0; i < buffer.Length; i++) |
|||
{ |
|||
crc = Unsafe.Add(ref crcTableRef, (crc ^ Unsafe.Add(ref bufferRef, i)) & 0xFF) ^ (crc >> 8); |
|||
} |
|||
|
|||
return crc; |
|||
} |
|||
} |
|||
Binary file not shown.
@ -1,42 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
#if NET6_0
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
|||
// The .NET Foundation licenses this file to you under the MIT license.
|
|||
namespace System.Diagnostics.CodeAnalysis |
|||
{ |
|||
/// <summary>
|
|||
/// Used to indicate a byref escapes and is not scoped.
|
|||
/// </summary>
|
|||
/// <remarks>
|
|||
/// <para>
|
|||
/// There are several cases where the C# compiler treats a <see langword="ref"/> as implicitly
|
|||
/// <see langword="scoped"/> - where the compiler does not allow the <see langword="ref"/> to escape the method.
|
|||
/// </para>
|
|||
/// <para>
|
|||
/// For example:
|
|||
/// <list type="number">
|
|||
/// <item><see langword="this"/> for <see langword="struct"/> instance methods.</item>
|
|||
/// <item><see langword="ref"/> parameters that refer to <see langword="ref"/> <see langword="struct"/> types.</item>
|
|||
/// <item><see langword="out"/> parameters.</item>
|
|||
/// </list>
|
|||
/// </para>
|
|||
/// <para>
|
|||
/// This attribute is used in those instances where the <see langword="ref"/> should be allowed to escape.
|
|||
/// </para>
|
|||
/// <para>
|
|||
/// Applying this attribute, in any form, has impact on consumers of the applicable API. It is necessary for
|
|||
/// API authors to understand the lifetime implications of applying this attribute and how it may impact their users.
|
|||
/// </para>
|
|||
/// </remarks>
|
|||
[global::System.AttributeUsage( |
|||
global::System.AttributeTargets.Method | |
|||
global::System.AttributeTargets.Property | |
|||
global::System.AttributeTargets.Parameter, |
|||
AllowMultiple = false, |
|||
Inherited = false)] |
|||
internal sealed class UnscopedRefAttribute : global::System.Attribute |
|||
{ |
|||
} |
|||
} |
|||
#endif
|
|||
@ -0,0 +1,94 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats; |
|||
|
|||
internal class AnimatedImageFrameMetadata |
|||
{ |
|||
/// <summary>
|
|||
/// Gets or sets the frame color table.
|
|||
/// </summary>
|
|||
public ReadOnlyMemory<Color>? ColorTable { get; set; } |
|||
|
|||
/// <summary>
|
|||
/// Gets or sets the frame color table mode.
|
|||
/// </summary>
|
|||
public FrameColorTableMode ColorTableMode { get; set; } |
|||
|
|||
/// <summary>
|
|||
/// Gets or sets the duration of the frame.
|
|||
/// </summary>
|
|||
public TimeSpan Duration { get; set; } |
|||
|
|||
/// <summary>
|
|||
/// Gets or sets the frame alpha blending mode.
|
|||
/// </summary>
|
|||
public FrameBlendMode BlendMode { get; set; } |
|||
|
|||
/// <summary>
|
|||
/// Gets or sets the frame disposal mode.
|
|||
/// </summary>
|
|||
public FrameDisposalMode DisposalMode { get; set; } |
|||
} |
|||
|
|||
#pragma warning disable SA1201 // Elements should appear in the correct order
|
|||
internal enum FrameBlendMode |
|||
#pragma warning restore SA1201 // Elements should appear in the correct order
|
|||
{ |
|||
/// <summary>
|
|||
/// Do not blend. Render the current frame on the canvas by overwriting the rectangle covered by the current frame.
|
|||
/// </summary>
|
|||
Source = 0, |
|||
|
|||
/// <summary>
|
|||
/// Blend the current frame with the previous frame in the animation sequence within the rectangle covered
|
|||
/// by the current frame.
|
|||
/// If the current has any transparent areas, the corresponding areas of the previous frame will be visible
|
|||
/// through these transparent regions.
|
|||
/// </summary>
|
|||
Over = 1 |
|||
} |
|||
|
|||
internal enum FrameDisposalMode |
|||
{ |
|||
/// <summary>
|
|||
/// No disposal specified.
|
|||
/// The decoder is not required to take any action.
|
|||
/// </summary>
|
|||
Unspecified = 0, |
|||
|
|||
/// <summary>
|
|||
/// Do not dispose. The current frame is not disposed of, or in other words, not cleared or altered when moving to
|
|||
/// the next frame. This means that the next frame is drawn over the current frame, and if the next frame contains
|
|||
/// transparency, the previous frame will be visible through these transparent areas.
|
|||
/// </summary>
|
|||
DoNotDispose = 1, |
|||
|
|||
/// <summary>
|
|||
/// Restore to background color. When transitioning to the next frame, the area occupied by the current frame is
|
|||
/// filled with the background color specified in the image metadata.
|
|||
/// This effectively erases the current frame by replacing it with the background color before the next frame is displayed.
|
|||
/// </summary>
|
|||
RestoreToBackground = 2, |
|||
|
|||
/// <summary>
|
|||
/// Restore to previous. This method restores the area affected by the current frame to what it was before the
|
|||
/// current frame was displayed. It essentially "undoes" the current frame, reverting to the state of the image
|
|||
/// before the frame was displayed, then the next frame is drawn. This is useful for animations where only a small
|
|||
/// part of the image changes from frame to frame.
|
|||
/// </summary>
|
|||
RestoreToPrevious = 3 |
|||
} |
|||
|
|||
internal enum FrameColorTableMode |
|||
{ |
|||
/// <summary>
|
|||
/// The frame uses the shared color table specified by the image metadata.
|
|||
/// </summary>
|
|||
Global, |
|||
|
|||
/// <summary>
|
|||
/// The frame uses a color table specified by the frame metadata.
|
|||
/// </summary>
|
|||
Local |
|||
} |
|||
@ -0,0 +1,33 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats; |
|||
|
|||
internal class AnimatedImageMetadata |
|||
{ |
|||
/// <summary>
|
|||
/// Gets or sets the shared color table.
|
|||
/// </summary>
|
|||
public ReadOnlyMemory<Color>? ColorTable { get; set; } |
|||
|
|||
/// <summary>
|
|||
/// Gets or sets the shared color table mode.
|
|||
/// </summary>
|
|||
public FrameColorTableMode ColorTableMode { get; set; } |
|||
|
|||
/// <summary>
|
|||
/// Gets or sets the default background color of the canvas when animating.
|
|||
/// This color may be used to fill the unused space on the canvas around the frames,
|
|||
/// as well as the transparent pixels of the first frame.
|
|||
/// The background color is also used when the disposal mode is <see cref="FrameDisposalMode.RestoreToBackground"/>.
|
|||
/// </summary>
|
|||
public Color BackgroundColor { get; set; } |
|||
|
|||
/// <summary>
|
|||
/// Gets or sets the number of times any animation is repeated.
|
|||
/// <remarks>
|
|||
/// 0 means to repeat indefinitely, count is set as repeat n-1 times. Defaults to 1.
|
|||
/// </remarks>
|
|||
/// </summary>
|
|||
public ushort RepeatCount { get; set; } |
|||
} |
|||
@ -0,0 +1,290 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Buffers; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.Arm; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using SixLabors.ImageSharp.Advanced; |
|||
using SixLabors.ImageSharp.Memory; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats; |
|||
|
|||
/// <summary>
|
|||
/// Utility methods for animated formats.
|
|||
/// </summary>
|
|||
internal static class AnimationUtilities |
|||
{ |
|||
/// <summary>
|
|||
/// Deduplicates pixels between the previous and current frame returning only the changed pixels and bounds.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The type of pixel format.</typeparam>
|
|||
/// <param name="configuration">The configuration.</param>
|
|||
/// <param name="previousFrame">The previous frame if present.</param>
|
|||
/// <param name="currentFrame">The current frame.</param>
|
|||
/// <param name="nextFrame">The next frame if present.</param>
|
|||
/// <param name="resultFrame">The resultant output.</param>
|
|||
/// <param name="replacement">The value to use when replacing duplicate pixels.</param>
|
|||
/// <param name="blend">Whether the resultant frame represents an animation blend.</param>
|
|||
/// <param name="clampingMode">The clamping bound to apply when calculating difference bounds.</param>
|
|||
/// <returns>The <see cref="ValueTuple{Boolean, Rectangle}"/> representing the operation result.</returns>
|
|||
public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>( |
|||
Configuration configuration, |
|||
ImageFrame<TPixel>? previousFrame, |
|||
ImageFrame<TPixel> currentFrame, |
|||
ImageFrame<TPixel>? nextFrame, |
|||
ImageFrame<TPixel> resultFrame, |
|||
Color replacement, |
|||
bool blend, |
|||
ClampingMode clampingMode = ClampingMode.None) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
MemoryAllocator memoryAllocator = configuration.MemoryAllocator; |
|||
using IMemoryOwner<Rgba32> buffers = memoryAllocator.Allocate<Rgba32>(currentFrame.Width * 4, AllocationOptions.Clean); |
|||
Span<Rgba32> previous = buffers.GetSpan()[..currentFrame.Width]; |
|||
Span<Rgba32> current = buffers.GetSpan().Slice(currentFrame.Width, currentFrame.Width); |
|||
Span<Rgba32> next = buffers.GetSpan().Slice(currentFrame.Width * 2, currentFrame.Width); |
|||
Span<Rgba32> result = buffers.GetSpan()[(currentFrame.Width * 3)..]; |
|||
|
|||
Rgba32 bg = replacement.ToPixel<Rgba32>(); |
|||
|
|||
int top = int.MinValue; |
|||
int bottom = int.MaxValue; |
|||
int left = int.MaxValue; |
|||
int right = int.MinValue; |
|||
|
|||
bool hasDiff = false; |
|||
for (int y = 0; y < currentFrame.Height; y++) |
|||
{ |
|||
if (previousFrame != null) |
|||
{ |
|||
PixelOperations<TPixel>.Instance.ToRgba32(configuration, previousFrame.DangerousGetPixelRowMemory(y).Span, previous); |
|||
} |
|||
|
|||
PixelOperations<TPixel>.Instance.ToRgba32(configuration, currentFrame.DangerousGetPixelRowMemory(y).Span, current); |
|||
|
|||
if (nextFrame != null) |
|||
{ |
|||
PixelOperations<TPixel>.Instance.ToRgba32(configuration, nextFrame.DangerousGetPixelRowMemory(y).Span, next); |
|||
} |
|||
|
|||
ref Vector256<byte> previousBase256 = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(previous)); |
|||
ref Vector256<byte> currentBase256 = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(current)); |
|||
ref Vector256<byte> nextBase256 = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(next)); |
|||
ref Vector256<byte> resultBase256 = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
int i = 0; |
|||
uint x = 0; |
|||
bool hasRowDiff = false; |
|||
int length = current.Length; |
|||
int remaining = current.Length; |
|||
|
|||
if (Avx2.IsSupported && remaining >= 8) |
|||
{ |
|||
Vector256<uint> r256 = previousFrame != null ? Vector256.Create(bg.PackedValue) : Vector256<uint>.Zero; |
|||
Vector256<uint> vmb256 = Vector256<uint>.Zero; |
|||
if (blend) |
|||
{ |
|||
vmb256 = Avx2.CompareEqual(vmb256, vmb256); |
|||
} |
|||
|
|||
while (remaining >= 8) |
|||
{ |
|||
Vector256<uint> p = Unsafe.Add(ref previousBase256, x).AsUInt32(); |
|||
Vector256<uint> c = Unsafe.Add(ref currentBase256, x).AsUInt32(); |
|||
|
|||
Vector256<uint> eq = Avx2.CompareEqual(p, c); |
|||
Vector256<uint> r = Avx2.BlendVariable(c, r256, Avx2.And(eq, vmb256)); |
|||
|
|||
if (nextFrame != null) |
|||
{ |
|||
Vector256<int> n = Avx2.ShiftRightLogical(Unsafe.Add(ref nextBase256, x).AsUInt32(), 24).AsInt32(); |
|||
eq = Avx2.AndNot(Avx2.CompareGreaterThan(Avx2.ShiftRightLogical(c, 24).AsInt32(), n).AsUInt32(), eq); |
|||
} |
|||
|
|||
Unsafe.Add(ref resultBase256, x) = r.AsByte(); |
|||
|
|||
uint msk = (uint)Avx2.MoveMask(eq.AsByte()); |
|||
msk = ~msk; |
|||
|
|||
if (msk != 0) |
|||
{ |
|||
// If is diff is found, the left side is marked by the min of previously found left side and the start position.
|
|||
// The right is the max of the previously found right side and the end position.
|
|||
int start = i + (BitOperations.TrailingZeroCount(msk) / sizeof(uint)); |
|||
int end = i + (8 - (BitOperations.LeadingZeroCount(msk) / sizeof(uint))); |
|||
left = Math.Min(left, start); |
|||
right = Math.Max(right, end); |
|||
hasRowDiff = true; |
|||
hasDiff = true; |
|||
} |
|||
|
|||
x++; |
|||
i += 8; |
|||
remaining -= 8; |
|||
} |
|||
} |
|||
|
|||
if (Sse2.IsSupported && remaining >= 4) |
|||
{ |
|||
// Update offset since we may be operating on the remainder previously incremented by pixel steps of 8.
|
|||
x *= 2; |
|||
Vector128<uint> r128 = previousFrame != null ? Vector128.Create(bg.PackedValue) : Vector128<uint>.Zero; |
|||
Vector128<uint> vmb128 = Vector128<uint>.Zero; |
|||
if (blend) |
|||
{ |
|||
vmb128 = Sse2.CompareEqual(vmb128, vmb128); |
|||
} |
|||
|
|||
while (remaining >= 4) |
|||
{ |
|||
Vector128<uint> p = Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref previousBase256), x); |
|||
Vector128<uint> c = Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref currentBase256), x); |
|||
|
|||
Vector128<uint> eq = Sse2.CompareEqual(p, c); |
|||
Vector128<uint> r = SimdUtils.HwIntrinsics.BlendVariable(c, r128, Sse2.And(eq, vmb128)); |
|||
|
|||
if (nextFrame != null) |
|||
{ |
|||
Vector128<int> n = Sse2.ShiftRightLogical(Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref nextBase256), x), 24).AsInt32(); |
|||
eq = Sse2.AndNot(Sse2.CompareGreaterThan(Sse2.ShiftRightLogical(c, 24).AsInt32(), n).AsUInt32(), eq); |
|||
} |
|||
|
|||
Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref resultBase256), x) = r; |
|||
|
|||
ushort msk = (ushort)(uint)Sse2.MoveMask(eq.AsByte()); |
|||
msk = (ushort)~msk; |
|||
if (msk != 0) |
|||
{ |
|||
// If is diff is found, the left side is marked by the min of previously found left side and the start position.
|
|||
// The right is the max of the previously found right side and the end position.
|
|||
int start = i + (SimdUtils.HwIntrinsics.TrailingZeroCount(msk) / sizeof(uint)); |
|||
int end = i + (4 - (SimdUtils.HwIntrinsics.LeadingZeroCount(msk) / sizeof(uint))); |
|||
left = Math.Min(left, start); |
|||
right = Math.Max(right, end); |
|||
hasRowDiff = true; |
|||
hasDiff = true; |
|||
} |
|||
|
|||
x++; |
|||
i += 4; |
|||
remaining -= 4; |
|||
} |
|||
} |
|||
|
|||
if (AdvSimd.IsSupported && remaining >= 4) |
|||
{ |
|||
// Update offset since we may be operating on the remainder previously incremented by pixel steps of 8.
|
|||
x *= 2; |
|||
Vector128<uint> r128 = previousFrame != null ? Vector128.Create(bg.PackedValue) : Vector128<uint>.Zero; |
|||
Vector128<uint> vmb128 = Vector128<uint>.Zero; |
|||
if (blend) |
|||
{ |
|||
vmb128 = AdvSimd.CompareEqual(vmb128, vmb128); |
|||
} |
|||
|
|||
while (remaining >= 4) |
|||
{ |
|||
Vector128<uint> p = Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref previousBase256), x); |
|||
Vector128<uint> c = Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref currentBase256), x); |
|||
|
|||
Vector128<uint> eq = AdvSimd.CompareEqual(p, c); |
|||
Vector128<uint> r = SimdUtils.HwIntrinsics.BlendVariable(c, r128, AdvSimd.And(eq, vmb128)); |
|||
|
|||
if (nextFrame != null) |
|||
{ |
|||
Vector128<int> n = AdvSimd.ShiftRightLogical(Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref nextBase256), x), 24).AsInt32(); |
|||
eq = AdvSimd.BitwiseClear(eq, AdvSimd.CompareGreaterThan(AdvSimd.ShiftRightLogical(c, 24).AsInt32(), n).AsUInt32()); |
|||
} |
|||
|
|||
Unsafe.Add(ref Unsafe.As<Vector256<byte>, Vector128<uint>>(ref resultBase256), x) = r; |
|||
|
|||
ulong msk = ~AdvSimd.ExtractNarrowingLower(eq).AsUInt64().ToScalar(); |
|||
if (msk != 0) |
|||
{ |
|||
// If is diff is found, the left side is marked by the min of previously found left side and the start position.
|
|||
// The right is the max of the previously found right side and the end position.
|
|||
int start = i + (BitOperations.TrailingZeroCount(msk) / 16); |
|||
int end = i + (4 - (BitOperations.LeadingZeroCount(msk) / 16)); |
|||
left = Math.Min(left, start); |
|||
right = Math.Max(right, end); |
|||
hasRowDiff = true; |
|||
hasDiff = true; |
|||
} |
|||
|
|||
x++; |
|||
i += 4; |
|||
remaining -= 4; |
|||
} |
|||
} |
|||
|
|||
for (i = remaining; i > 0; i--) |
|||
{ |
|||
x = (uint)(length - i); |
|||
|
|||
Rgba32 p = Unsafe.Add(ref MemoryMarshal.GetReference(previous), x); |
|||
Rgba32 c = Unsafe.Add(ref MemoryMarshal.GetReference(current), x); |
|||
Rgba32 n = Unsafe.Add(ref MemoryMarshal.GetReference(next), x); |
|||
ref Rgba32 r = ref Unsafe.Add(ref MemoryMarshal.GetReference(result), x); |
|||
|
|||
bool peq = c.Rgba == (previousFrame != null ? p.Rgba : bg.Rgba); |
|||
Rgba32 val = (blend & peq) ? bg : c; |
|||
|
|||
peq &= nextFrame == null || (n.Rgba >> 24 >= c.Rgba >> 24); |
|||
r = val; |
|||
|
|||
if (!peq) |
|||
{ |
|||
// If is diff is found, the left side is marked by the min of previously found left side and the diff position.
|
|||
// The right is the max of the previously found right side and the diff position + 1.
|
|||
left = Math.Min(left, (int)x); |
|||
right = Math.Max(right, (int)x + 1); |
|||
hasRowDiff = true; |
|||
hasDiff = true; |
|||
} |
|||
} |
|||
|
|||
if (hasRowDiff) |
|||
{ |
|||
if (top == int.MinValue) |
|||
{ |
|||
top = y; |
|||
} |
|||
|
|||
bottom = y + 1; |
|||
} |
|||
|
|||
PixelOperations<TPixel>.Instance.FromRgba32(configuration, result, resultFrame.DangerousGetPixelRowMemory(y).Span); |
|||
} |
|||
|
|||
Rectangle bounds = Rectangle.FromLTRB( |
|||
left = Numerics.Clamp(left, 0, resultFrame.Width - 1), |
|||
top = Numerics.Clamp(top, 0, resultFrame.Height - 1), |
|||
Numerics.Clamp(right, left + 1, resultFrame.Width), |
|||
Numerics.Clamp(bottom, top + 1, resultFrame.Height)); |
|||
|
|||
// Webp requires even bounds
|
|||
if (clampingMode == ClampingMode.Even) |
|||
{ |
|||
bounds.Width = Math.Min(resultFrame.Width, bounds.Width + (bounds.X & 1)); |
|||
bounds.Height = Math.Min(resultFrame.Height, bounds.Height + (bounds.Y & 1)); |
|||
bounds.X = Math.Max(0, bounds.X - (bounds.X & 1)); |
|||
bounds.Y = Math.Max(0, bounds.Y - (bounds.Y & 1)); |
|||
} |
|||
|
|||
return (hasDiff, bounds); |
|||
} |
|||
} |
|||
|
|||
#pragma warning disable SA1201 // Elements should appear in the correct order
|
|||
internal enum ClampingMode |
|||
#pragma warning restore SA1201 // Elements should appear in the correct order
|
|||
{ |
|||
None, |
|||
|
|||
Even, |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg; |
|||
|
|||
/// <summary>
|
|||
/// Represents a JPEG comment
|
|||
/// </summary>
|
|||
public readonly struct JpegComData |
|||
{ |
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="JpegComData"/> struct.
|
|||
/// </summary>
|
|||
/// <param name="value">The comment buffer.</param>
|
|||
public JpegComData(ReadOnlyMemory<char> value) |
|||
=> this.Value = value; |
|||
|
|||
/// <summary>
|
|||
/// Gets the value.
|
|||
/// </summary>
|
|||
public ReadOnlyMemory<char> Value { get; } |
|||
|
|||
/// <summary>
|
|||
/// Converts string to <see cref="JpegComData"/>
|
|||
/// </summary>
|
|||
/// <param name="value">The comment string.</param>
|
|||
/// <returns>The <see cref="JpegComData"/></returns>
|
|||
public static JpegComData FromString(string value) => new(value.AsMemory()); |
|||
|
|||
/// <inheritdoc/>
|
|||
public override string ToString() => this.Value.ToString(); |
|||
} |
|||
@ -1,53 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
// TODO: Review this class as it's used to represent 2 different things.
|
|||
// 1.The encoded image pixel format.
|
|||
// 2. The pixel format of the decoded image.
|
|||
namespace SixLabors.ImageSharp.Formats; |
|||
|
|||
/// <summary>
|
|||
/// Contains information about the pixels that make up an images visual data.
|
|||
/// </summary>
|
|||
public class PixelTypeInfo |
|||
{ |
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="PixelTypeInfo"/> class.
|
|||
/// </summary>
|
|||
/// <param name="bitsPerPixel">Color depth, in number of bits per pixel.</param>
|
|||
public PixelTypeInfo(int bitsPerPixel) |
|||
=> this.BitsPerPixel = bitsPerPixel; |
|||
|
|||
/// <summary>
|
|||
/// Initializes a new instance of the <see cref="PixelTypeInfo"/> class.
|
|||
/// </summary>
|
|||
/// <param name="bitsPerPixel">Color depth, in number of bits per pixel.</param>
|
|||
/// <param name="alpha">The pixel alpha transparency behavior.</param>
|
|||
public PixelTypeInfo(int bitsPerPixel, PixelAlphaRepresentation alpha) |
|||
{ |
|||
this.BitsPerPixel = bitsPerPixel; |
|||
this.AlphaRepresentation = alpha; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets color depth, in number of bits per pixel.
|
|||
/// </summary>
|
|||
public int BitsPerPixel { get; } |
|||
|
|||
/// <summary>
|
|||
/// Gets the pixel alpha transparency behavior.
|
|||
/// <see langword="null"/> means unknown, unspecified.
|
|||
/// </summary>
|
|||
public PixelAlphaRepresentation? AlphaRepresentation { get; } |
|||
|
|||
internal static PixelTypeInfo Create<TPixel>() |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
=> new(Unsafe.SizeOf<TPixel>() * 8); |
|||
|
|||
internal static PixelTypeInfo Create<TPixel>(PixelAlphaRepresentation alpha) |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
=> new(Unsafe.SizeOf<TPixel>() * 8, alpha); |
|||
} |
|||
@ -0,0 +1,30 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Png; |
|||
|
|||
/// <summary>
|
|||
/// Specifies how to handle validation of any CRC (Cyclic Redundancy Check) data within the encoded PNG.
|
|||
/// </summary>
|
|||
public enum PngCrcChunkHandling |
|||
{ |
|||
/// <summary>
|
|||
/// Do not ignore any CRC chunk errors.
|
|||
/// </summary>
|
|||
IgnoreNone, |
|||
|
|||
/// <summary>
|
|||
/// Ignore CRC errors in non critical chunks.
|
|||
/// </summary>
|
|||
IgnoreNonCritical, |
|||
|
|||
/// <summary>
|
|||
/// Ignore CRC errors in data chunks.
|
|||
/// </summary>
|
|||
IgnoreData, |
|||
|
|||
/// <summary>
|
|||
/// Ignore CRC errors in all chunks.
|
|||
/// </summary>
|
|||
IgnoreAll |
|||
} |
|||
@ -0,0 +1,24 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Png; |
|||
|
|||
/// <summary>
|
|||
/// Configuration options for decoding png images.
|
|||
/// </summary>
|
|||
public sealed class PngDecoderOptions : ISpecializedDecoderOptions |
|||
{ |
|||
/// <inheritdoc/>
|
|||
public DecoderOptions GeneralOptions { get; init; } = new DecoderOptions(); |
|||
|
|||
/// <summary>
|
|||
/// Gets a value indicating how to handle validation of any CRC (Cyclic Redundancy Check) data within the encoded PNG.
|
|||
/// </summary>
|
|||
public PngCrcChunkHandling PngCrcChunkHandling { get; init; } = PngCrcChunkHandling.IgnoreNonCritical; |
|||
|
|||
/// <summary>
|
|||
/// Gets the maximum memory in bytes that a zTXt, sPLT, iTXt, iCCP, or unknown chunk can occupy when decompressed.
|
|||
/// Defaults to 8MB
|
|||
/// </summary>
|
|||
public int MaxUncompressedAncillaryChunkSizeBytes { get; init; } = 8 * 1024 * 1024; // 8MB
|
|||
} |
|||
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue