|
|
|
@ -144,18 +144,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
Vector128<short> b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16()); |
|
|
|
|
|
|
|
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
|
|
|
|
Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2.AsInt16()); |
|
|
|
Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1.AsInt16()); |
|
|
|
Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2); |
|
|
|
Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1); |
|
|
|
Vector128<short> c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16()); |
|
|
|
Vector128<short> c4 = Sse2.Subtract(c1, c2); |
|
|
|
Vector128<short> c = Sse2.Add(c3.AsInt16(), c4.AsInt16()); |
|
|
|
Vector128<short> c = Sse2.Add(c3, c4); |
|
|
|
|
|
|
|
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
|
|
|
|
Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1.AsInt16()); |
|
|
|
Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2.AsInt16()); |
|
|
|
Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1); |
|
|
|
Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2); |
|
|
|
Vector128<short> d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16()); |
|
|
|
Vector128<short> d4 = Sse2.Add(d1, d2); |
|
|
|
Vector128<short> d = Sse2.Add(d3.AsInt16(), d4.AsInt16()); |
|
|
|
Vector128<short> d = Sse2.Add(d3, d4); |
|
|
|
|
|
|
|
// Second pass.
|
|
|
|
Vector128<short> tmp0 = Sse2.Add(a, d); |
|
|
|
@ -177,14 +177,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1); |
|
|
|
c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16()); |
|
|
|
c4 = Sse2.Subtract(c1, c2); |
|
|
|
c = Sse2.Add(c3.AsInt16(), c4); |
|
|
|
c = Sse2.Add(c3, c4); |
|
|
|
|
|
|
|
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
|
|
|
|
d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1); |
|
|
|
d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2); |
|
|
|
d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16()); |
|
|
|
d4 = Sse2.Add(d1, d2); |
|
|
|
d = Sse2.Add(d3.AsInt16(), d4); |
|
|
|
d = Sse2.Add(d3, d4); |
|
|
|
|
|
|
|
// Second pass.
|
|
|
|
tmp0 = Sse2.Add(a, d); |
|
|
|
@ -236,10 +236,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
Vector128<short> ref3InvAdded = Sse2.Add(ref3.AsInt16(), t3.AsInt16()); |
|
|
|
|
|
|
|
// Unsigned saturate to 8b.
|
|
|
|
ref0 = Sse2.PackUnsignedSaturate(ref0InvAdded.AsInt16(), ref0InvAdded.AsInt16()); |
|
|
|
ref1 = Sse2.PackUnsignedSaturate(ref1InvAdded.AsInt16(), ref1InvAdded.AsInt16()); |
|
|
|
ref2 = Sse2.PackUnsignedSaturate(ref2InvAdded.AsInt16(), ref2InvAdded.AsInt16()); |
|
|
|
ref3 = Sse2.PackUnsignedSaturate(ref3InvAdded.AsInt16(), ref3InvAdded.AsInt16()); |
|
|
|
ref0 = Sse2.PackUnsignedSaturate(ref0InvAdded, ref0InvAdded); |
|
|
|
ref1 = Sse2.PackUnsignedSaturate(ref1InvAdded, ref1InvAdded); |
|
|
|
ref2 = Sse2.PackUnsignedSaturate(ref2InvAdded, ref2InvAdded); |
|
|
|
ref3 = Sse2.PackUnsignedSaturate(ref3InvAdded, ref3InvAdded); |
|
|
|
|
|
|
|
// Unsigned saturate to 8b.
|
|
|
|
if (doTwo) |
|
|
|
|