From abcbc4c48d6bce5543a45003742f98ccd0b7ef9d Mon Sep 17 00:00:00 2001
From: Brian Popow <popow@gmx.de>
Date: Thu, 11 Nov 2021 17:01:56 +0100
Subject: [PATCH] Fix issue: vectors need to be short type

---
 .../Formats/Webp/Lossy/Vp8Encoding.cs           | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
index dab466b9a..6ec191baa 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Buffers.Binary;
+using System.Linq;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 #if SUPPORTS_RUNTIME_INTRINSICS
@@ -145,14 +146,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
                 // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
                 Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2.AsInt16());
                 Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1.AsInt16());
-                Vector128<long> c3 = Sse2.Subtract(in1, in3);
+                Vector128<short> c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16());
                 Vector128<short> c4 = Sse2.Subtract(c1, c2);
                 Vector128<short> c = Sse2.Add(c3.AsInt16(), c4.AsInt16());
 
                 // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
                 Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1.AsInt16());
                 Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2.AsInt16());
-                Vector128<long> d3 = Sse2.Add(in1, in3);
+                Vector128<short> d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16());
                 Vector128<short> d4 = Sse2.Add(d1, d2);
                 Vector128<short> d = Sse2.Add(d3.AsInt16(), d4.AsInt16());
 
@@ -174,14 +175,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
                 // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
                 c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2);
                 c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1);
-                c3 = Sse2.Subtract(t1, t3);
+                c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16());
                 c4 = Sse2.Subtract(c1, c2);
                 c = Sse2.Add(c3.AsInt16(), c4);
 
                 // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
                 d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1);
                 d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2);
-                d3 = Sse2.Add(t1, t3);
+                d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16());
                 d4 = Sse2.Add(d1, d2);
                 d = Sse2.Add(d3.AsInt16(), d4);
 
@@ -229,10 +230,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
                 ref3 = Sse2.UnpackLow(ref3, Vector128<byte>.Zero);
 
                 // Add the inverse transform(s).
-                Vector128<ushort> ref0InvAdded = Sse2.Add(ref0.AsUInt16(), t0.AsUInt16());
-                Vector128<ushort> ref1InvAdded = Sse2.Add(ref1.AsUInt16(), t1.AsUInt16());
-                Vector128<ushort> ref2InvAdded = Sse2.Add(ref2.AsUInt16(), t2.AsUInt16());
-                Vector128<ushort> ref3InvAdded = Sse2.Add(ref3.AsUInt16(), t3.AsUInt16());
+                Vector128<short> ref0InvAdded = Sse2.Add(ref0.AsInt16(), t0.AsInt16());
+                Vector128<short> ref1InvAdded = Sse2.Add(ref1.AsInt16(), t1.AsInt16());
+                Vector128<short> ref2InvAdded = Sse2.Add(ref2.AsInt16(), t2.AsInt16());
+                Vector128<short> ref3InvAdded = Sse2.Add(ref3.AsInt16(), t3.AsInt16());
 
                 // Unsigned saturate to 8b.
                 ref0 = Sse2.PackUnsignedSaturate(ref0InvAdded.AsInt16(), ref0InvAdded.AsInt16());