|
|
|
@ -4,11 +4,16 @@ |
|
|
|
using System; |
|
|
|
using System.Buffers.Binary; |
|
|
|
using System.Runtime.CompilerServices; |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
using System.Numerics; |
|
|
|
using System.Runtime.Intrinsics; |
|
|
|
using System.Runtime.Intrinsics.X86; |
|
|
|
#endif
|
|
|
|
|
|
|
|
// ReSharper disable InconsistentNaming
|
|
|
|
namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
{ |
|
|
|
internal static class LossyUtils |
|
|
|
internal static unsafe class LossyUtils |
|
|
|
{ |
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
public static int Vp8Sse16X16(Span<byte> a, Span<byte> b) => GetSse(a, b, 16, 16); |
|
|
|
@ -17,7 +22,57 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
public static int Vp8Sse16X8(Span<byte> a, Span<byte> b) => GetSse(a, b, 16, 8); |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
public static int Vp8Sse4X4(Span<byte> a, Span<byte> b) => GetSse(a, b, 4, 4); |
|
|
|
public static int Vp8Sse4X4(Span<byte> a, Span<byte> b) |
|
|
|
{ |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
if (Sse2.IsSupported) |
|
|
|
{ |
|
|
|
#pragma warning disable SA1503 // Braces should not be omitted
|
|
|
|
Span<int> tmp = stackalloc int[4]; |
|
|
|
fixed (byte* aPtr = a) |
|
|
|
fixed (byte* bPtr = b) |
|
|
|
fixed (int* tmpPtr = tmp) |
|
|
|
{ |
|
|
|
// Load values.
|
|
|
|
Vector128<byte> a0 = Sse2.LoadVector128(aPtr); |
|
|
|
Vector128<byte> a1 = Sse2.LoadVector128(aPtr + WebpConstants.Bps); |
|
|
|
Vector128<byte> a2 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 2)); |
|
|
|
Vector128<byte> a3 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 3)); |
|
|
|
Vector128<byte> b0 = Sse2.LoadVector128(bPtr); |
|
|
|
Vector128<byte> b1 = Sse2.LoadVector128(bPtr + WebpConstants.Bps); |
|
|
|
Vector128<byte> b2 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 2)); |
|
|
|
Vector128<byte> b3 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 3)); |
|
|
|
|
|
|
|
// Combine pair of lines.
|
|
|
|
Vector128<int> a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); |
|
|
|
Vector128<int> a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); |
|
|
|
Vector128<int> b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); |
|
|
|
Vector128<int> b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); |
|
|
|
|
|
|
|
// Convert to 16b.
|
|
|
|
Vector128<byte> a01s = Sse2.UnpackLow(a01.AsByte(), Vector128<byte>.Zero); |
|
|
|
Vector128<byte> a23s = Sse2.UnpackLow(a23.AsByte(), Vector128<byte>.Zero); |
|
|
|
Vector128<byte> b01s = Sse2.UnpackLow(b01.AsByte(), Vector128<byte>.Zero); |
|
|
|
Vector128<byte> b23s = Sse2.UnpackLow(b23.AsByte(), Vector128<byte>.Zero); |
|
|
|
|
|
|
|
// subtract, square and accumulate.
|
|
|
|
Vector128<byte> d0 = Sse2.SubtractSaturate(a01s, b01s); |
|
|
|
Vector128<byte> d1 = Sse2.SubtractSaturate(a23s, b23s); |
|
|
|
Vector128<int> e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); |
|
|
|
Vector128<int> e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); |
|
|
|
Vector128<int> sum = Sse2.Add(e0, e1); |
|
|
|
|
|
|
|
Sse2.Store(tmpPtr, sum); |
|
|
|
return tmp[3] + tmp[2] + tmp[1] + tmp[0]; |
|
|
|
} |
|
|
|
#pragma warning restore SA1503 // Braces should not be omitted
|
|
|
|
} |
|
|
|
else |
|
|
|
#endif
|
|
|
|
{ |
|
|
|
return GetSse(a, b, 4, 4); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
public static int GetSse(Span<byte> a, Span<byte> b, int w, int h) |
|
|
|
|