diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index ba5c588ca5..fa0af823d5 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -820,6 +820,26 @@ namespace SixLabors.ImageSharp
}
}
+ ///
+ /// Reduces elements of the vector into one sum.
+ ///
+ /// The accumulator to reduce.
+ /// The sum of all elements.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int ReduceSum(Vector256 accumulator)
+ {
+ // Add upper lane to lower lane.
+ Vector128 vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper());
+
+ // Add odd to even.
+ vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_11_01_01));
+
+ // Add high to low.
+ vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10));
+
+ return Sse2.ConvertToInt32(vsum);
+ }
+
///
/// Reduces even elements of the vector into one sum.
///
diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
index 0ed180a184..ebb198a2d8 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
@@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory;
@@ -761,28 +762,184 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Shanon entropy.
public static float CombinedShannonEntropy(Span x, Span y)
{
- double retVal = 0.0d;
- uint sumX = 0, sumXY = 0;
- for (int i = 0; i < 256; i++)
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
{
- uint xi = (uint)x[i];
- if (xi != 0)
+ double retVal = 0.0d;
+ Vector256 tmp = Vector256.Zero; // has the size of the scratch space of sizeof(int) * 8
+ ref int xRef = ref MemoryMarshal.GetReference(x);
+ ref int yRef = ref MemoryMarshal.GetReference(y);
+ Vector256 sumXY256 = Vector256.Zero;
+ Vector256 sumX256 = Vector256.Zero;
+ ref int tmpRef = ref Unsafe.As, int>(ref tmp);
+ for (nint i = 0; i < 256; i += 8)
{
- uint xy = xi + (uint)y[i];
- sumX += xi;
- retVal -= FastSLog2(xi);
- sumXY += xy;
- retVal -= FastSLog2(xy);
+ Vector256 xVec = Unsafe.As>(ref Unsafe.Add(ref xRef, i));
+ Vector256 yVec = Unsafe.As>(ref Unsafe.Add(ref yRef, i));
+
+ // Check if any X is non-zero: this actually provides a speedup as X is usually sparse.
+ int mask = Avx2.MoveMask(Avx2.CompareEqual(xVec, Vector256.Zero).AsByte());
+ if (mask != -1)
+ {
+ Vector256 xy256 = Avx2.Add(xVec, yVec);
+ sumXY256 = Avx2.Add(sumXY256, xy256);
+ sumX256 = Avx2.Add(sumX256, xVec);
+
+ // Analyze the different X + Y.
+ Unsafe.As>(ref tmpRef) = xy256;
+ if (tmpRef != 0)
+ {
+ retVal -= FastSLog2((uint)tmpRef);
+ if (Unsafe.Add(ref xRef, i) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i));
+ }
+ }
+
+ if (Unsafe.Add(ref tmpRef, 1) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 1));
+ if (Unsafe.Add(ref xRef, i + 1) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 1));
+ }
+ }
+
+ if (Unsafe.Add(ref tmpRef, 2) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 2));
+ if (Unsafe.Add(ref xRef, i + 2) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 2));
+ }
+ }
+
+ if (Unsafe.Add(ref tmpRef, 3) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 3));
+ if (Unsafe.Add(ref xRef, i + 3) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 3));
+ }
+ }
+
+ if (Unsafe.Add(ref tmpRef, 4) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 4));
+ if (Unsafe.Add(ref xRef, i + 4) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 4));
+ }
+ }
+
+ if (Unsafe.Add(ref tmpRef, 5) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 5));
+ if (Unsafe.Add(ref xRef, i + 5) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 5));
+ }
+ }
+
+ if (Unsafe.Add(ref tmpRef, 6) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 6));
+ if (Unsafe.Add(ref xRef, i + 6) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 6));
+ }
+ }
+
+ if (Unsafe.Add(ref tmpRef, 7) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 7));
+ if (Unsafe.Add(ref xRef, i + 7) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 7));
+ }
+ }
+ }
+ else
+ {
+ // X is fully 0, so only deal with Y.
+ sumXY256 = Avx2.Add(sumXY256, yVec);
+
+ if (Unsafe.Add(ref yRef, i) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i));
+ }
+
+ if (Unsafe.Add(ref yRef, i + 1) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 1));
+ }
+
+ if (Unsafe.Add(ref yRef, i + 2) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 2));
+ }
+
+ if (Unsafe.Add(ref yRef, i + 3) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 3));
+ }
+
+ if (Unsafe.Add(ref yRef, i + 4) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 4));
+ }
+
+ if (Unsafe.Add(ref yRef, i + 5) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 5));
+ }
+
+ if (Unsafe.Add(ref yRef, i + 6) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 6));
+ }
+
+ if (Unsafe.Add(ref yRef, i + 7) != 0)
+ {
+ retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 7));
+ }
+ }
}
- else if (y[i] != 0)
+
+ // Sum up sumX256 to get sumX and sum up sumXY256 to get sumXY.
+ int sumX = Numerics.ReduceSum(sumX256);
+ int sumXY = Numerics.ReduceSum(sumXY256);
+
+ retVal += FastSLog2((uint)sumX) + FastSLog2((uint)sumXY);
+
+ return (float)retVal;
+ }
+ else
+#endif
+ {
+ double retVal = 0.0d;
+ uint sumX = 0, sumXY = 0;
+ for (int i = 0; i < 256; i++)
{
- sumXY += (uint)y[i];
- retVal -= FastSLog2((uint)y[i]);
+ uint xi = (uint)x[i];
+ if (xi != 0)
+ {
+ uint xy = xi + (uint)y[i];
+ sumX += xi;
+ retVal -= FastSLog2(xi);
+ sumXY += xy;
+ retVal -= FastSLog2(xy);
+ }
+ else if (y[i] != 0)
+ {
+ sumXY += (uint)y[i];
+ retVal -= FastSLog2((uint)y[i]);
+ }
}
- }
- retVal += FastSLog2(sumX) + FastSLog2(sumXY);
- return (float)retVal;
+ retVal += FastSLog2(sumX) + FastSLog2(sumXY);
+ return (float)retVal;
+ }
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -838,6 +995,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static float FastSLog2Slow(uint v)
{
DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
+
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
@@ -867,7 +1025,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static float FastLog2Slow(uint v)
{
- Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
+ DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
index 125645c7d0..41ae848427 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
@@ -795,7 +795,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
uint v = src[0] * 0x01010101u;
Span vSpan = BitConverter.GetBytes(v).AsSpan();
- for (int i = 0; i < 16; i++)
+ for (nint i = 0; i < 16; i++)
{
if (!src.Slice(0, 4).SequenceEqual(vSpan) || !src.Slice(4, 4).SequenceEqual(vSpan) ||
!src.Slice(8, 4).SequenceEqual(vSpan) || !src.Slice(12, 4).SequenceEqual(vSpan))
@@ -813,19 +813,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
private static bool IsFlat(Span levels, int numBlocks, int thresh)
{
int score = 0;
+ ref short levelsRef = ref MemoryMarshal.GetReference(levels);
+ int offset = 0;
while (numBlocks-- > 0)
{
- for (int i = 1; i < 16; i++)
+ for (nint i = 1; i < 16; i++)
{
// omit DC, we're only interested in AC
- score += levels[i] != 0 ? 1 : 0;
+ score += Unsafe.Add(ref levelsRef, offset) != 0 ? 1 : 0;
if (score > thresh)
{
return false;
}
}
- levels = levels.Slice(16);
+ offset += 16;
}
return true;
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
index aa4ab5767b..f12a1a7855 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
@@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
///
/// Methods for encoding a VP8 frame.
///
- internal static class Vp8Encoding
+ internal static unsafe class Vp8Encoding
{
private const int KC1 = 20091 + (1 << 16);
@@ -66,11 +66,39 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static readonly int[] Vp8I4ModeOffsets = { I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 };
#if SUPPORTS_RUNTIME_INTRINSICS
- public static readonly Vector128 K1 = Vector128.Create((short)20091).AsInt16();
+#pragma warning disable SA1310 // Field names should not contain underscore
+ private static readonly Vector128 K1 = Vector128.Create((short)20091).AsInt16();
- public static readonly Vector128 K2 = Vector128.Create((short)-30068).AsInt16();
+ private static readonly Vector128 K2 = Vector128.Create((short)-30068).AsInt16();
- public static readonly Vector128 Four = Vector128.Create((short)4);
+ private static readonly Vector128 Four = Vector128.Create((short)4);
+
+ private static readonly Vector128 Seven = Vector128.Create((short)7);
+
+ private static readonly Vector128 K88p = Vector128.Create(8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0).AsInt16();
+
+ private static readonly Vector128 K88m = Vector128.Create(8, 0, 248, 255, 8, 0, 248, 255, 8, 0, 248, 255, 8, 0, 248, 255).AsInt16();
+
+ private static readonly Vector128 K5352_2217p = Vector128.Create(232, 20, 169, 8, 232, 20, 169, 8, 232, 20, 169, 8, 232, 20, 169, 8).AsInt16();
+
+ private static readonly Vector128 K5352_2217m = Vector128.Create(169, 8, 24, 235, 169, 8, 24, 235, 169, 8, 24, 235, 169, 8, 24, 235).AsInt16();
+
+ private static readonly Vector128 K937 = Vector128.Create(937);
+
+ private static readonly Vector128 K1812 = Vector128.Create(1812);
+
+ private static readonly Vector128 K5352_2217 = Vector128.Create(169, 8, 232, 20, 169, 8, 232, 20, 169, 8, 232, 20, 169, 8, 232, 20).AsInt16();
+
+ private static readonly Vector128 K2217_5352 = Vector128.Create(24, 235, 169, 8, 24, 235, 169, 8, 24, 235, 169, 8, 24, 235, 169, 8).AsInt16();
+
+ private static readonly Vector128 K12000PlusOne = Vector128.Create(12000 + (1 << 16));
+
+ private static readonly Vector128 K51000 = Vector128.Create(51000);
+
+ private static readonly byte MmShuffle2301 = SimdUtils.Shuffle.MmShuffle(2, 3, 0, 1);
+
+ private static readonly byte MmShuffle1032 = SimdUtils.Shuffle.MmShuffle(1, 0, 3, 2);
+#pragma warning restore SA1310 // Field names should not contain underscore
#endif
static Vp8Encoding()
@@ -376,49 +404,246 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static void FTransform2(Span src, Span reference, Span output, Span output2, Span scratch)
{
- FTransform(src, reference, output, scratch);
- FTransform(src.Slice(4), reference.Slice(4), output2, scratch);
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ ref byte srcRef = ref MemoryMarshal.GetReference(src);
+ ref byte referenceRef = ref MemoryMarshal.GetReference(reference);
+
+ // Load src.
+ var src0 = Vector128.Create(Unsafe.As(ref srcRef), 0);
+ var src1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps)), 0);
+ var src2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 2)), 0);
+ var src3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 3)), 0);
+
+ // Load ref.
+ var ref0 = Vector128.Create(Unsafe.As(ref referenceRef), 0);
+ var ref1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps)), 0);
+ var ref2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2)), 0);
+ var ref3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3)), 0);
+
+ // Convert both to 16 bit.
+ Vector128 srcLow0 = Sse2.UnpackLow(src0.AsByte(), Vector128.Zero);
+ Vector128 srcLow1 = Sse2.UnpackLow(src1.AsByte(), Vector128.Zero);
+ Vector128 srcLow2 = Sse2.UnpackLow(src2.AsByte(), Vector128.Zero);
+ Vector128 srcLow3 = Sse2.UnpackLow(src3.AsByte(), Vector128.Zero);
+ Vector128 refLow0 = Sse2.UnpackLow(ref0.AsByte(), Vector128.Zero);
+ Vector128 refLow1 = Sse2.UnpackLow(ref1.AsByte(), Vector128.Zero);
+ Vector128 refLow2 = Sse2.UnpackLow(ref2.AsByte(), Vector128.Zero);
+ Vector128 refLow3 = Sse2.UnpackLow(ref3.AsByte(), Vector128.Zero);
+
+ // Compute difference. -> 00 01 02 03 00' 01' 02' 03'
+ Vector128 diff0 = Sse2.Subtract(srcLow0.AsInt16(), refLow0.AsInt16());
+ Vector128 diff1 = Sse2.Subtract(srcLow1.AsInt16(), refLow1.AsInt16());
+ Vector128 diff2 = Sse2.Subtract(srcLow2.AsInt16(), refLow2.AsInt16());
+ Vector128 diff3 = Sse2.Subtract(srcLow3.AsInt16(), refLow3.AsInt16());
+
+ // Unpack and shuffle.
+ // 00 01 02 03 0 0 0 0
+ // 10 11 12 13 0 0 0 0
+ // 20 21 22 23 0 0 0 0
+ // 30 31 32 33 0 0 0 0
+ Vector128 shuf01l = Sse2.UnpackLow(diff0.AsInt32(), diff1.AsInt32());
+ Vector128 shuf23l = Sse2.UnpackLow(diff2.AsInt32(), diff3.AsInt32());
+ Vector128 shuf01h = Sse2.UnpackHigh(diff0.AsInt32(), diff1.AsInt32());
+ Vector128 shuf23h = Sse2.UnpackHigh(diff2.AsInt32(), diff3.AsInt32());
+
+ // First pass.
+ FTransformPass1SSE2(shuf01l.AsInt16(), shuf23l.AsInt16(), out Vector128 v01l, out Vector128 v32l);
+ FTransformPass1SSE2(shuf01h.AsInt16(), shuf23h.AsInt16(), out Vector128 v01h, out Vector128 v32h);
+
+ // Second pass.
+ FTransformPass2SSE2(v01l, v32l, output);
+ FTransformPass2SSE2(v01h, v32h, output2);
+ }
+ else
+#endif
+ {
+ FTransform(src, reference, output, scratch);
+ FTransform(src.Slice(4), reference.Slice(4), output2, scratch);
+ }
}
public static void FTransform(Span src, Span reference, Span output, Span scratch)
{
- int i;
- Span tmp = scratch.Slice(0, 16);
-
- int srcIdx = 0;
- int refIdx = 0;
- for (i = 0; i < 4; i++)
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
{
- int d0 = src[srcIdx] - reference[refIdx]; // 9bit dynamic range ([-255,255])
- int d1 = src[srcIdx + 1] - reference[refIdx + 1];
- int d2 = src[srcIdx + 2] - reference[refIdx + 2];
- int d3 = src[srcIdx + 3] - reference[refIdx + 3];
- int a0 = d0 + d3; // 10b [-510,510]
- int a1 = d1 + d2;
- int a2 = d1 - d2;
- int a3 = d0 - d3;
- tmp[0 + (i * 4)] = (a0 + a1) * 8; // 14b [-8160,8160]
- tmp[1 + (i * 4)] = ((a2 * 2217) + (a3 * 5352) + 1812) >> 9; // [-7536,7542]
- tmp[2 + (i * 4)] = (a0 - a1) * 8;
- tmp[3 + (i * 4)] = ((a3 * 2217) - (a2 * 5352) + 937) >> 9;
-
- srcIdx += WebpConstants.Bps;
- refIdx += WebpConstants.Bps;
- }
+ ref byte srcRef = ref MemoryMarshal.GetReference(src);
+ ref byte referenceRef = ref MemoryMarshal.GetReference(reference);
- for (i = 0; i < 4; i++)
+ // Load src.
+ var src0 = Vector128.Create(Unsafe.As(ref srcRef), 0);
+ var src1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps)), 0);
+ var src2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 2)), 0);
+ var src3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 3)), 0);
+
+ // Load ref.
+ var ref0 = Vector128.Create(Unsafe.As(ref referenceRef), 0);
+ var ref1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps)), 0);
+ var ref2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2)), 0);
+ var ref3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3)), 0);
+
+ // 00 01 02 03 *
+ // 10 11 12 13 *
+ // 20 21 22 23 *
+ // 30 31 32 33 *
+ // Shuffle.
+ Vector128 srcLow0 = Sse2.UnpackLow(src0.AsInt16(), src1.AsInt16());
+ Vector128 srcLow1 = Sse2.UnpackLow(src2.AsInt16(), src3.AsInt16());
+ Vector128 refLow0 = Sse2.UnpackLow(ref0.AsInt16(), ref1.AsInt16());
+ Vector128 refLow1 = Sse2.UnpackLow(ref2.AsInt16(), ref3.AsInt16());
+
+ // 00 01 10 11 02 03 12 13 * * ...
+ // 20 21 30 31 22 22 32 33 * * ...
+
+ // Convert both to 16 bit.
+ Vector128 src0_16b = Sse2.UnpackLow(srcLow0.AsByte(), Vector128.Zero);
+ Vector128 src1_16b = Sse2.UnpackLow(srcLow1.AsByte(), Vector128.Zero);
+ Vector128 ref0_16b = Sse2.UnpackLow(refLow0.AsByte(), Vector128.Zero);
+ Vector128 ref1_16b = Sse2.UnpackLow(refLow1.AsByte(), Vector128.Zero);
+
+ // Compute the difference.
+ Vector128 row01 = Sse2.Subtract(src0_16b.AsInt16(), ref0_16b.AsInt16());
+ Vector128 row23 = Sse2.Subtract(src1_16b.AsInt16(), ref1_16b.AsInt16());
+
+ // First pass.
+ FTransformPass1SSE2(row01, row23, out Vector128 v01, out Vector128 v32);
+
+ // Second pass.
+ FTransformPass2SSE2(v01, v32, output);
+ }
+ else
+#endif
{
- int a0 = tmp[0 + i] + tmp[12 + i]; // 15b
- int a1 = tmp[4 + i] + tmp[8 + i];
- int a2 = tmp[4 + i] - tmp[8 + i];
- int a3 = tmp[0 + i] - tmp[12 + i];
- output[0 + i] = (short)((a0 + a1 + 7) >> 4); // 12b
- output[4 + i] = (short)((((a2 * 2217) + (a3 * 5352) + 12000) >> 16) + (a3 != 0 ? 1 : 0));
- output[8 + i] = (short)((a0 - a1 + 7) >> 4);
- output[12 + i] = (short)(((a3 * 2217) - (a2 * 5352) + 51000) >> 16);
+ int i;
+ Span tmp = scratch.Slice(0, 16);
+
+ int srcIdx = 0;
+ int refIdx = 0;
+ for (i = 0; i < 4; i++)
+ {
+ int d3 = src[srcIdx + 3] - reference[refIdx + 3];
+ int d2 = src[srcIdx + 2] - reference[refIdx + 2];
+ int d1 = src[srcIdx + 1] - reference[refIdx + 1];
+ int d0 = src[srcIdx] - reference[refIdx]; // 9bit dynamic range ([-255,255])
+ int a0 = d0 + d3; // 10b [-510,510]
+ int a1 = d1 + d2;
+ int a2 = d1 - d2;
+ int a3 = d0 - d3;
+ tmp[3 + (i * 4)] = ((a3 * 2217) - (a2 * 5352) + 937) >> 9;
+ tmp[2 + (i * 4)] = (a0 - a1) * 8;
+ tmp[1 + (i * 4)] = ((a2 * 2217) + (a3 * 5352) + 1812) >> 9; // [-7536,7542]
+ tmp[0 + (i * 4)] = (a0 + a1) * 8; // 14b [-8160,8160]
+
+ srcIdx += WebpConstants.Bps;
+ refIdx += WebpConstants.Bps;
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ int t12 = tmp[12 + i]; // 15b
+ int t8 = tmp[8 + i];
+
+ int a1 = tmp[4 + i] + t8;
+ int a2 = tmp[4 + i] - t8;
+ int a0 = tmp[0 + i] + t12; // 15b
+ int a3 = tmp[0 + i] - t12;
+
+ output[12 + i] = (short)(((a3 * 2217) - (a2 * 5352) + 51000) >> 16);
+ output[8 + i] = (short)((a0 - a1 + 7) >> 4);
+ output[4 + i] = (short)((((a2 * 2217) + (a3 * 5352) + 12000) >> 16) + (a3 != 0 ? 1 : 0));
+ output[0 + i] = (short)((a0 + a1 + 7) >> 4); // 12b
+ }
}
}
+#if SUPPORTS_RUNTIME_INTRINSICS
+ public static void FTransformPass1SSE2(Vector128 row01, Vector128 row23, out Vector128 out01, out Vector128 out32)
+ {
+ // *in01 = 00 01 10 11 02 03 12 13
+ // *in23 = 20 21 30 31 22 23 32 33
+ Vector128 shuf01_p = Sse2.ShuffleHigh(row01, MmShuffle2301);
+ Vector128 shuf32_p = Sse2.ShuffleHigh(row23, MmShuffle2301);
+
+ // 00 01 10 11 03 02 13 12
+ // 20 21 30 31 23 22 33 32
+ Vector128 s01 = Sse2.UnpackLow(shuf01_p.AsInt64(), shuf32_p.AsInt64());
+ Vector128 s32 = Sse2.UnpackHigh(shuf01_p.AsInt64(), shuf32_p.AsInt64());
+
+ // 00 01 10 11 20 21 30 31
+ // 03 02 13 12 23 22 33 32
+ Vector128 a01 = Sse2.Add(s01.AsInt16(), s32.AsInt16());
+ Vector128 a32 = Sse2.Subtract(s01.AsInt16(), s32.AsInt16());
+
+ // [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
+ // [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]
+ Vector128 tmp0 = Sse2.MultiplyAddAdjacent(a01, K88p); // [ (a0 + a1) << 3, ... ]
+ Vector128 tmp2 = Sse2.MultiplyAddAdjacent(a01, K88m); // [ (a0 - a1) << 3, ... ]
+ Vector128 tmp11 = Sse2.MultiplyAddAdjacent(a32, K5352_2217p);
+ Vector128 tmp31 = Sse2.MultiplyAddAdjacent(a32, K5352_2217m);
+ Vector128 tmp12 = Sse2.Add(tmp11, K1812);
+ Vector128 tmp32 = Sse2.Add(tmp31, K937);
+ Vector128 tmp1 = Sse2.ShiftRightArithmetic(tmp12, 9);
+ Vector128 tmp3 = Sse2.ShiftRightArithmetic(tmp32, 9);
+ Vector128 s03 = Sse2.PackSignedSaturate(tmp0, tmp2);
+ Vector128 s12 = Sse2.PackSignedSaturate(tmp1, tmp3);
+ Vector128 slo = Sse2.UnpackLow(s03, s12); // 0 1 0 1 0 1...
+ Vector128 shi = Sse2.UnpackHigh(s03, s12); // 2 3 2 3 2 3
+ Vector128 v23 = Sse2.UnpackHigh(slo.AsInt32(), shi.AsInt32());
+ out01 = Sse2.UnpackLow(slo.AsInt32(), shi.AsInt32());
+ out32 = Sse2.Shuffle(v23, MmShuffle1032);
+ }
+
+ public static void FTransformPass2SSE2(Vector128 v01, Vector128 v32, Span output)
+ {
+ // Same operations are done on the (0,3) and (1,2) pairs.
+ // a3 = v0 - v3
+ // a2 = v1 - v2
+ Vector128 a32 = Sse2.Subtract(v01.AsInt16(), v32.AsInt16());
+ Vector128 a22 = Sse2.UnpackHigh(a32.AsInt64(), a32.AsInt64());
+
+ Vector128 b23 = Sse2.UnpackLow(a22.AsInt16(), a32.AsInt16());
+ Vector128 c1 = Sse2.MultiplyAddAdjacent(b23, K5352_2217);
+ Vector128 c3 = Sse2.MultiplyAddAdjacent(b23, K2217_5352);
+ Vector128 d1 = Sse2.Add(c1, K12000PlusOne);
+ Vector128 d3 = Sse2.Add(c3, K51000);
+ Vector128 e1 = Sse2.ShiftRightArithmetic(d1, 16);
+ Vector128 e3 = Sse2.ShiftRightArithmetic(d3, 16);
+
+ // f1 = ((b3 * 5352 + b2 * 2217 + 12000) >> 16)
+ // f3 = ((b3 * 2217 - b2 * 5352 + 51000) >> 16)
+ Vector128 f1 = Sse2.PackSignedSaturate(e1, e1);
+ Vector128 f3 = Sse2.PackSignedSaturate(e3, e3);
+
+ // g1 = f1 + (a3 != 0);
+ // The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
+ // desired (0, 1), we add one earlier through k12000_plus_one.
+ // -> g1 = f1 + 1 - (a3 == 0)
+ Vector128 g1 = Sse2.Add(f1, Sse2.CompareEqual(a32, Vector128.Zero));
+
+ // a0 = v0 + v3
+ // a1 = v1 + v2
+ Vector128 a01 = Sse2.Add(v01.AsInt16(), v32.AsInt16());
+ Vector128 a01Plus7 = Sse2.Add(a01.AsInt16(), Seven);
+ Vector128 a11 = Sse2.UnpackHigh(a01.AsInt64(), a01.AsInt64()).AsInt16();
+ Vector128 c0 = Sse2.Add(a01Plus7, a11);
+ Vector128 c2 = Sse2.Subtract(a01Plus7, a11);
+
+ // d0 = (a0 + a1 + 7) >> 4;
+ // d2 = (a0 - a1 + 7) >> 4;
+ Vector128 d0 = Sse2.ShiftRightArithmetic(c0, 4);
+ Vector128 d2 = Sse2.ShiftRightArithmetic(c2, 4);
+
+ Vector128 d0g1 = Sse2.UnpackLow(d0.AsInt64(), g1.AsInt64());
+ Vector128 d2f3 = Sse2.UnpackLow(d2.AsInt64(), f3.AsInt64());
+
+ ref short outputRef = ref MemoryMarshal.GetReference(output);
+ Unsafe.As>(ref outputRef) = d0g1.AsInt16();
+ Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)) = d2f3.AsInt16();
+ }
+#endif
+
public static void FTransformWht(Span input, Span output, Span scratch)
{
Span tmp = scratch.Slice(0, 16);
@@ -427,32 +652,37 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int inputIdx = 0;
for (i = 0; i < 4; i++)
{
- int a0 = input[inputIdx + (0 * 16)] + input[inputIdx + (2 * 16)]; // 13b
int a1 = input[inputIdx + (1 * 16)] + input[inputIdx + (3 * 16)];
int a2 = input[inputIdx + (1 * 16)] - input[inputIdx + (3 * 16)];
+ int a0 = input[inputIdx + (0 * 16)] + input[inputIdx + (2 * 16)]; // 13b
int a3 = input[inputIdx + (0 * 16)] - input[inputIdx + (2 * 16)];
- tmp[0 + (i * 4)] = a0 + a1; // 14b
- tmp[1 + (i * 4)] = a3 + a2;
- tmp[2 + (i * 4)] = a3 - a2;
tmp[3 + (i * 4)] = a0 - a1;
+ tmp[2 + (i * 4)] = a3 - a2;
+ tmp[1 + (i * 4)] = a3 + a2;
+ tmp[0 + (i * 4)] = a0 + a1; // 14b
inputIdx += 64;
}
for (i = 0; i < 4; i++)
{
- int a0 = tmp[0 + i] + tmp[8 + i]; // 15b
- int a1 = tmp[4 + i] + tmp[12 + i];
- int a2 = tmp[4 + i] - tmp[12 + i];
- int a3 = tmp[0 + i] - tmp[8 + i];
+ int t12 = tmp[12 + i];
+ int t8 = tmp[8 + i];
+
+ int a1 = tmp[4 + i] + t12;
+ int a2 = tmp[4 + i] - t12;
+ int a0 = tmp[0 + i] + t8; // 15b
+ int a3 = tmp[0 + i] - t8;
+
int b0 = a0 + a1; // 16b
int b1 = a3 + a2;
int b2 = a3 - a2;
int b3 = a0 - a1;
- output[0 + i] = (short)(b0 >> 1); // 15b
- output[4 + i] = (short)(b1 >> 1);
- output[8 + i] = (short)(b2 >> 1);
+
output[12 + i] = (short)(b3 >> 1);
+ output[8 + i] = (short)(b2 >> 1);
+ output[4 + i] = (short)(b1 >> 1);
+ output[0 + i] = (short)(b0 >> 1); // 15b
}
}
diff --git a/src/ImageSharp/Image.FromFile.cs b/src/ImageSharp/Image.FromFile.cs
index 3a4b459c54..fce0835fba 100644
--- a/src/ImageSharp/Image.FromFile.cs
+++ b/src/ImageSharp/Image.FromFile.cs
@@ -255,6 +255,7 @@ namespace SixLabors.ImageSharp
///
/// The file path to the image.
/// The decoder.
+ /// The token to monitor for cancellation requests.
/// The configuration is null.
/// The path is null.
/// The decoder is null.
@@ -262,14 +263,15 @@ namespace SixLabors.ImageSharp
/// Image format is not supported.
/// Image contains invalid content.
/// A representing the asynchronous operation.
- public static Task LoadAsync(string path, IImageDecoder decoder)
- => LoadAsync(Configuration.Default, path, decoder, default);
+ public static Task LoadAsync(string path, IImageDecoder decoder, CancellationToken cancellationToken = default)
+ => LoadAsync(Configuration.Default, path, decoder, cancellationToken);
///
/// Create a new instance of the class from the given file.
///
/// The file path to the image.
/// The decoder.
+ /// The token to monitor for cancellation requests.
/// The configuration is null.
/// The path is null.
/// The decoder is null.
@@ -278,9 +280,9 @@ namespace SixLabors.ImageSharp
/// Image contains invalid content.
/// The pixel format.
/// A representing the asynchronous operation.
- public static Task> LoadAsync(string path, IImageDecoder decoder)
+ public static Task> LoadAsync(string path, IImageDecoder decoder, CancellationToken cancellationToken = default)
where TPixel : unmanaged, IPixel
- => LoadAsync(Configuration.Default, path, decoder, default);
+ => LoadAsync(Configuration.Default, path, decoder, cancellationToken);
///
/// Create a new instance of the class from the given file.
@@ -342,6 +344,7 @@ namespace SixLabors.ImageSharp
/// Create a new instance of the class from the given file.
///
/// The file path to the image.
+ /// The token to monitor for cancellation requests.
/// The configuration is null.
/// The path is null.
/// Image format not recognised.
@@ -349,9 +352,9 @@ namespace SixLabors.ImageSharp
/// Image format is not supported.
/// The pixel format.
/// A representing the asynchronous operation.
- public static Task> LoadAsync(string path)
+ public static Task> LoadAsync(string path, CancellationToken cancellationToken = default)
where TPixel : unmanaged, IPixel
- => LoadAsync(Configuration.Default, path, default(CancellationToken));
+ => LoadAsync(Configuration.Default, path, cancellationToken);
///
/// Create a new instance of the class from the given file.
diff --git a/src/ImageSharp/Image.FromStream.cs b/src/ImageSharp/Image.FromStream.cs
index 291d6f7cab..f5e32d8ce0 100644
--- a/src/ImageSharp/Image.FromStream.cs
+++ b/src/ImageSharp/Image.FromStream.cs
@@ -44,27 +44,29 @@ namespace SixLabors.ImageSharp
/// By reading the header on the provided stream this calculates the images format type.
///
/// The image stream to read the header from.
+ /// The token to monitor for cancellation requests.
/// The stream is null.
/// The stream is not readable.
/// A representing the asynchronous operation or null if none is found.
- public static Task DetectFormatAsync(Stream stream)
- => DetectFormatAsync(Configuration.Default, stream);
+ public static Task DetectFormatAsync(Stream stream, CancellationToken cancellationToken = default)
+ => DetectFormatAsync(Configuration.Default, stream, cancellationToken);
///
/// By reading the header on the provided stream this calculates the images format type.
///
/// The configuration.
/// The image stream to read the header from.
+ /// The token to monitor for cancellation requests.
/// The configuration is null.
/// The stream is null.
/// The stream is not readable.
/// A representing the asynchronous operation.
- public static Task DetectFormatAsync(Configuration configuration, Stream stream)
+ public static Task DetectFormatAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken = default)
=> WithSeekableStreamAsync(
configuration,
stream,
(s, _) => InternalDetectFormatAsync(s, configuration),
- default);
+ cancellationToken);
///
/// Reads the raw image information from the specified stream without fully decoding it.
@@ -83,6 +85,7 @@ namespace SixLabors.ImageSharp
/// Reads the raw image information from the specified stream without fully decoding it.
///
/// The image stream to read the header from.
+ /// The token to monitor for cancellation requests.
/// The stream is null.
/// The stream is not readable.
/// Image contains invalid content.
@@ -90,8 +93,8 @@ namespace SixLabors.ImageSharp
/// A representing the asynchronous operation or null if
/// a suitable detector is not found.
///
- public static Task IdentifyAsync(Stream stream)
- => IdentifyAsync(Configuration.Default, stream);
+ public static Task IdentifyAsync(Stream stream, CancellationToken cancellationToken = default)
+ => IdentifyAsync(Configuration.Default, stream, cancellationToken);
///
/// Reads the raw image information from the specified stream without fully decoding it.
@@ -227,13 +230,14 @@ namespace SixLabors.ImageSharp
/// The pixel format is selected by the decoder.
///
/// The stream containing image information.
+ /// The token to monitor for cancellation requests.
/// The stream is null.
/// The stream is not readable or the image format is not supported.
/// Image format not recognised.
/// Image contains invalid content.
/// A representing the asynchronous operation.
- public static Task<(Image Image, IImageFormat Format)> LoadWithFormatAsync(Stream stream)
- => LoadWithFormatAsync(Configuration.Default, stream);
+ public static Task<(Image Image, IImageFormat Format)> LoadWithFormatAsync(Stream stream, CancellationToken cancellationToken = default)
+ => LoadWithFormatAsync(Configuration.Default, stream, cancellationToken);
///
/// Decode a new instance of the class from the given stream.
@@ -252,12 +256,14 @@ namespace SixLabors.ImageSharp
/// The pixel format is selected by the decoder.
///
/// The stream containing image information.
+ /// The token to monitor for cancellation requests.
/// The stream is null.
/// The stream is not readable or the image format is not supported.
/// Image format not recognised.
/// Image contains invalid content.
/// A representing the asynchronous operation.
- public static Task LoadAsync(Stream stream) => LoadAsync(Configuration.Default, stream);
+ public static Task LoadAsync(Stream stream, CancellationToken cancellationToken = default)
+ => LoadAsync(Configuration.Default, stream, cancellationToken);
///
/// Decode a new instance of the class from the given stream.
@@ -280,14 +286,15 @@ namespace SixLabors.ImageSharp
///
/// The stream containing image information.
/// The decoder.
+ /// The token to monitor for cancellation requests.
/// The stream is null.
/// The decoder is null.
/// The stream is not readable or the image format is not supported.
/// Image format not recognised.
/// Image contains invalid content.
/// A representing the asynchronous operation.
- public static Task LoadAsync(Stream stream, IImageDecoder decoder)
- => LoadAsync(Configuration.Default, stream, decoder);
+ public static Task LoadAsync(Stream stream, IImageDecoder decoder, CancellationToken cancellationToken = default)
+ => LoadAsync(Configuration.Default, stream, decoder, cancellationToken);
///
/// Decode a new instance of the class from the given stream.
@@ -388,15 +395,16 @@ namespace SixLabors.ImageSharp
/// Create a new instance of the class from the given stream.
///
/// The stream containing image information.
+ /// The token to monitor for cancellation requests.
/// The stream is null.
/// The stream is not readable or the image format is not supported.
/// Image format not recognised.
/// Image contains invalid content.
/// The pixel format.
/// A representing the asynchronous operation.
- public static Task> LoadAsync(Stream stream)
+ public static Task> LoadAsync(Stream stream, CancellationToken cancellationToken = default)
where TPixel : unmanaged, IPixel
- => LoadAsync(Configuration.Default, stream);
+ => LoadAsync(Configuration.Default, stream, cancellationToken);
///
/// Create a new instance of the class from the given stream.
@@ -417,15 +425,16 @@ namespace SixLabors.ImageSharp
/// Create a new instance of the class from the given stream.
///
/// The stream containing image information.
+ /// The token to monitor for cancellation requests.
/// The stream is null.
/// The stream is not readable or the image format is not supported.
/// Image format not recognised.
/// Image contains invalid content.
/// The pixel format.
/// A representing the asynchronous operation.
- public static async Task<(Image Image, IImageFormat Format)> LoadWithFormatAsync(Stream stream)
+ public static async Task<(Image Image, IImageFormat Format)> LoadWithFormatAsync(Stream stream, CancellationToken cancellationToken = default)
where TPixel : unmanaged, IPixel
- => await LoadWithFormatAsync(Configuration.Default, stream).ConfigureAwait(false);
+ => await LoadWithFormatAsync(Configuration.Default, stream, cancellationToken).ConfigureAwait(false);
///
/// Create a new instance of the class from the given stream.
diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
index 62e23c1cdf..684d7791bf 100644
--- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
@@ -10,6 +10,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Trait("Format", "Webp")]
public class LosslessUtilsTests
{
+ private static void RunCombinedShannonEntropyTest()
+ {
+ int[] x = { 3, 5, 2, 5, 3, 1, 2, 2, 3, 3, 1, 2, 1, 2, 1, 1, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 1, 1, 0, 0, 2, 1, 1, 0, 3, 1, 2, 3, 2, 3 };
+ int[] y = { 11, 12, 8, 3, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 1, 1, 2, 4, 6, 4 };
+ float expected = 884.7585f;
+
+ float actual = LosslessUtils.CombinedShannonEntropy(x, y);
+
+ Assert.Equal(expected, actual, 5);
+ }
+
private static void RunSubtractGreenTest()
{
uint[] pixelData =
@@ -193,6 +204,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
}
}
+ [Fact]
+ public void CombinedShannonEntropy_Works() => RunCombinedShannonEntropyTest();
+
[Fact]
public void Predictor11_Works() => RunPredictor11Test();
@@ -216,6 +230,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
#if SUPPORTS_RUNTIME_INTRINSICS
+ [Fact]
+ public void CombinedShannonEntropy_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCombinedShannonEntropyTest, HwIntrinsics.AllowAll);
+
+ [Fact]
+ public void CombinedShannonEntropy_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCombinedShannonEntropyTest, HwIntrinsics.DisableAVX2);
+
[Fact]
public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll);
@@ -238,19 +258,19 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);
[Fact]
- public void SubtractGreen_WithoutAvx_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX);
+ public void SubtractGreen_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX2);
[Fact]
- public void SubtractGreen_WithoutAvxOrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSSE3);
+ public void SubtractGreen_WithoutAvxOrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSSE3);
[Fact]
public void AddGreenToBlueAndRed_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.AllowAll);
[Fact]
- public void AddGreenToBlueAndRed_WithoutAvx_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX);
+ public void AddGreenToBlueAndRed_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX2);
[Fact]
- public void AddGreenToBlueAndRed_WithoutAvxOrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3);
+ public void AddGreenToBlueAndRed_WithoutAVX2OrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3);
[Fact]
public void TransformColor_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.AllowAll);
diff --git a/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs b/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs
index 6bcb4f21f4..245e1cdc11 100644
--- a/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs
+++ b/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs
@@ -11,6 +11,57 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Trait("Format", "Webp")]
public class Vp8EncodingTests
{
+ private static void RunFTransform2Test()
+ {
+ // arrange
+ byte[] src = { 154, 154, 151, 151, 149, 148, 151, 157, 163, 163, 154, 132, 102, 98, 104, 108, 107, 104, 104, 103, 101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150, 147, 147, 146, 159, 164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117, 172, 172, 172, 168, 170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126, 93, 90, 102, 107, 104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175, 150, 149, 152, 151, 148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100, 102, 102, 121, 117, 170, 170, 169, 171, 171, 179, 173, 175 };
+ byte[] reference = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129 };
+ short[] actualOutput1 = new short[16];
+ short[] actualOutput2 = new short[16];
+ short[] expectedOutput1 = { 182, 4, 1, 1, 6, 7, -1, -4, 5, 0, -2, 1, 2, 1, 1, 1 };
+ short[] expectedOutput2 = { 192, -34, 10, 1, -11, 8, 10, -7, 6, 3, -8, 4, 5, -3, -2, 6 };
+
+ // act
+ Vp8Encoding.FTransform2(src, reference, actualOutput1, actualOutput2, new int[16]);
+
+ // assert
+ Assert.True(expectedOutput1.SequenceEqual(actualOutput1));
+ Assert.True(expectedOutput2.SequenceEqual(actualOutput2));
+ }
+
+ private static void RunFTransformTest()
+ {
+ // arrange
+ byte[] src =
+ {
+ 154, 154, 151, 151, 149, 148, 151, 157, 163, 163, 154, 132, 102, 98, 104, 108, 107, 104, 104, 103,
+ 101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150, 147, 147, 146, 159,
+ 164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117, 172, 172, 172, 168,
+ 170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126, 93, 90, 102, 107,
+ 104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175, 150, 149, 152, 151,
+ 148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100, 102, 102, 121, 117,
+ 170, 170, 169, 171, 171, 179, 173, 175
+ };
+ byte[] reference =
+ {
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
+ 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129,
+ 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129,
+ 129, 129, 129, 129, 129, 129, 129, 129
+ };
+ short[] actualOutput = new short[16];
+ short[] expectedOutput = { 182, 4, 1, 1, 6, 7, -1, -4, 5, 0, -2, 1, 2, 1, 1, 1 };
+
+ // act
+ Vp8Encoding.FTransform(src, reference, actualOutput, new int[16]);
+
+ // assert
+ Assert.True(expectedOutput.SequenceEqual(actualOutput));
+ }
+
private static void RunOneInverseTransformTest()
{
// arrange
@@ -75,6 +126,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
Assert.True(dst.SequenceEqual(expected));
}
+ [Fact]
+ public void FTransform2_Works() => RunFTransform2Test();
+
+ [Fact]
+ public void FTransform_Works() => RunFTransformTest();
+
[Fact]
public void OneInverseTransform_Works() => RunOneInverseTransformTest();
@@ -82,6 +139,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
public void TwoInverseTransform_Works() => RunTwoInverseTransformTest();
#if SUPPORTS_RUNTIME_INTRINSICS
+ [Fact]
+ public void FTransform2_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransform2Test, HwIntrinsics.AllowAll);
+
+ [Fact]
+ public void FTransform2_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransform2Test, HwIntrinsics.DisableHWIntrinsic);
+
+ [Fact]
+ public void FTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransformTest, HwIntrinsics.AllowAll);
+
+ [Fact]
+ public void FTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransformTest, HwIntrinsics.DisableHWIntrinsic);
+
[Fact]
public void OneInverseTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunOneInverseTransformTest, HwIntrinsics.AllowAll);