diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 94ad343c8..c202ad4a8 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -128,66 +128,57 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (Avx2.IsSupported) { int numPixels = pixelData.Length; - fixed (uint* p = pixelData) + int i; + for (i = 0; i + 8 <= numPixels; i += 8) { - int i; - for (i = 0; i + 8 <= numPixels; i += 8) - { - uint* idx = p + i; - Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte(); - Vector256 in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2); - Vector256 output = Avx2.Add(input, in0g0g); - Avx.Store((byte*)idx, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); + Vector256 input = Unsafe.As>(ref pos).AsByte(); + Vector256 in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2); + Vector256 output = Avx2.Add(input, in0g0g); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (i != numPixels) - { - AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); - } + if (i != numPixels) + { + AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); } } else if (Ssse3.IsSupported) { int numPixels = pixelData.Length; - fixed (uint* p = pixelData) + int i; + for (i = 0; i + 4 <= numPixels; i += 4) { - int i; - for (i = 0; i + 4 <= numPixels; i += 4) - { - uint* idx = p + i; - Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte(); - Vector128 in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3); - Vector128 output = Sse2.Add(input, in0g0g); - Sse2.Store((byte*)idx, output.AsByte()); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); + Vector128 input = Unsafe.As>(ref pos).AsByte(); + Vector128 in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3); + Vector128 output = Sse2.Add(input, in0g0g); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (i != numPixels) - { - AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); - } + if (i != numPixels) + { + AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); } } else if (Sse2.IsSupported) { int numPixels = pixelData.Length; - fixed (uint* p = pixelData) + int i; + for (i = 0; i + 4 <= numPixels; i += 4) { - int i; - for (i = 0; i + 4 <= numPixels; i += 4) - { - uint* idx = p + i; - Vector128 input = Sse2.LoadVector128((ushort*)idx); - Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask); - Vector128 c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g - Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); - Sse2.Store((byte*)idx, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); + Vector128 input = Unsafe.As>(ref pos).AsByte(); + Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g + Vector128 b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g + Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (i != numPixels) - { - AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); - } + if (i != numPixels) + { + AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i)); } } else @@ -217,66 +208,57 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (Avx2.IsSupported) { int numPixels = pixelData.Length; - fixed (uint* p = pixelData) + int i; + for (i = 0; i + 8 <= numPixels; i += 8) { - int i; - for (i = 0; i + 8 <= numPixels; i += 8) - { - uint* idx = p + i; - Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte(); - Vector256 in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2); - Vector256 output = Avx2.Subtract(input, in0g0g); - Avx.Store((byte*)idx, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); + Vector256 input = Unsafe.As>(ref pos).AsByte(); + Vector256 in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2); + Vector256 output = Avx2.Subtract(input, in0g0g); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (i != numPixels) - { - SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); - } + if (i != numPixels) + { + SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); } } else if (Ssse3.IsSupported) { int numPixels = pixelData.Length; - fixed (uint* p = pixelData) + int i; + for (i = 0; i + 4 <= numPixels; i += 4) { - int i; - for (i = 0; i + 4 <= numPixels; i += 4) - { - uint* idx = p + i; - Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte(); - Vector128 in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3); - Vector128 output = Sse2.Subtract(input, in0g0g); - Sse2.Store((byte*)idx, output.AsByte()); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); + Vector128 input = Unsafe.As>(ref pos).AsByte(); + Vector128 in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3); + Vector128 output = Sse2.Subtract(input, in0g0g); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (i != numPixels) - { - SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); - } + if (i != numPixels) + { + SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); } } else if (Sse2.IsSupported) { int numPixels = pixelData.Length; - fixed (uint* p = pixelData) + int i; + for (i = 0; i + 4 <= numPixels; i += 4) { - int i; - for (i = 0; i + 4 <= numPixels; i += 4) - { - uint* idx = p + i; - Vector128 input = Sse2.LoadVector128((ushort*)idx); - Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask); - Vector128 c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g - Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); - Sse2.Store((byte*)idx, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); + Vector128 input = Unsafe.As>(ref pos).AsByte(); + Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g + Vector128 b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g + Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (i != numPixels) - { - SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); - } + if (i != numPixels) + { + SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i)); } } else @@ -409,75 +391,70 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red. /// /// The Vp8LMultipliers. - /// The pixel data to transform. + /// The pixel data to transform. /// The number of pixels to process. - public static void TransformColor(Vp8LMultipliers m, Span data, int numPixels) + public static void TransformColor(Vp8LMultipliers m, Span pixelData, int numPixels) { #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported && numPixels >= 8) { Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); - fixed (uint* src = data) + + int idx; + for (idx = 0; idx + 8 <= numPixels; idx += 8) { - int idx; - for (idx = 0; idx + 8 <= numPixels; idx += 8) - { - uint* pos = src + idx; - Vector256 input = Avx.LoadVector256(pos); - Vector256 a = Avx2.And(input.AsByte(), TransformColorAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); - Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); - Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); - Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); - Vector256 g = Avx2.ShiftRightLogical(f.AsInt32(), 16); - Vector256 h = Avx2.Add(g.AsByte(), d.AsByte()); - Vector256 i = Avx2.And(h, TransformColorRedBlueMask256); - Vector256 output = Avx2.Subtract(input.AsByte(), i); - Avx.Store((byte*)pos, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); + Vector256 input = Unsafe.As>(ref pos); + Vector256 a = Avx2.And(input.AsByte(), TransformColorAlphaGreenMask256); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); + Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); + Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); + Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); + Vector256 g = Avx2.ShiftRightLogical(f.AsInt32(), 16); + Vector256 h = Avx2.Add(g.AsByte(), d.AsByte()); + Vector256 i = Avx2.And(h, TransformColorRedBlueMask256); + Vector256 output = Avx2.Subtract(input.AsByte(), i); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (idx != numPixels) - { - TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx); - } + if (idx != numPixels) + { + TransformColorNoneVectorized(m, pixelData.Slice(idx), numPixels - idx); } } else if (Sse2.IsSupported) { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); - fixed (uint* src = data) + int idx; + for (idx = 0; idx + 4 <= numPixels; idx += 4) { - int idx; - for (idx = 0; idx + 4 <= numPixels; idx += 4) - { - uint* pos = src + idx; - Vector128 input = Sse2.LoadVector128(pos); - Vector128 a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); - Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); - Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); - Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); - Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16); - Vector128 h = Sse2.Add(g.AsByte(), d.AsByte()); - Vector128 i = Sse2.And(h, TransformColorRedBlueMask); - Vector128 output = Sse2.Subtract(input.AsByte(), i); - Sse2.Store((byte*)pos, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); + Vector128 input = Unsafe.As>(ref pos); + Vector128 a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); + Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); + Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); + Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); + Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16); + Vector128 h = Sse2.Add(g.AsByte(), d.AsByte()); + Vector128 i = Sse2.And(h, TransformColorRedBlueMask); + Vector128 output = Sse2.Subtract(input.AsByte(), i); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (idx != numPixels) - { - TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx); - } + if (idx != numPixels) + { + TransformColorNoneVectorized(m, pixelData.Slice(idx), numPixels - idx); } } else #endif { - TransformColorNoneVectorized(m, data, numPixels); + TransformColorNoneVectorized(m, pixelData, numPixels); } } @@ -511,62 +488,57 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); - fixed (uint* src = pixelData) + int idx; + for (idx = 0; idx + 8 <= pixelData.Length; idx += 8) { - int idx; - for (idx = 0; idx + 8 <= pixelData.Length; idx += 8) - { - uint* pos = src + idx; - Vector256 input = Avx.LoadVector256(pos); - Vector256 a = Avx2.And(input.AsByte(), TransformColorInverseAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); - Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); - Vector256 e = Avx2.Add(input.AsByte(), d.AsByte()); - Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); - Vector256 g = Avx2.MultiplyHigh(f, multsb2.AsInt16()); - Vector256 h = Avx2.ShiftRightLogical(g.AsInt32(), 8); - Vector256 i = Avx2.Add(h.AsByte(), f.AsByte()); - Vector256 j = Avx2.ShiftRightLogical(i.AsInt16(), 8); - Vector256 output = Avx2.Or(j.AsByte(), a); - Avx.Store((byte*)pos, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); + Vector256 input = Unsafe.As>(ref pos); + Vector256 a = Avx2.And(input.AsByte(), TransformColorInverseAlphaGreenMask256); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); + Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); + Vector256 e = Avx2.Add(input.AsByte(), d.AsByte()); + Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); + Vector256 g = Avx2.MultiplyHigh(f, multsb2.AsInt16()); + Vector256 h = Avx2.ShiftRightLogical(g.AsInt32(), 8); + Vector256 i = Avx2.Add(h.AsByte(), f.AsByte()); + Vector256 j = Avx2.ShiftRightLogical(i.AsInt16(), 8); + Vector256 output = Avx2.Or(j.AsByte(), a); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (idx != pixelData.Length) - { - TransformColorInverseNoneVectorized(m, pixelData.Slice(idx)); - } + if (idx != pixelData.Length) + { + TransformColorInverseNoneVectorized(m, pixelData.Slice(idx)); } } else if (Sse2.IsSupported) { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); - fixed (uint* src = pixelData) + + int idx; + for (idx = 0; idx + 4 <= pixelData.Length; idx += 4) { - int idx; - for (idx = 0; idx + 4 <= pixelData.Length; idx += 4) - { - uint* pos = src + idx; - Vector128 input = Sse2.LoadVector128(pos); - Vector128 a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); - Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); - Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); - Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); - Vector128 g = Sse2.MultiplyHigh(f, multsb2.AsInt16()); - Vector128 h = Sse2.ShiftRightLogical(g.AsInt32(), 8); - Vector128 i = Sse2.Add(h.AsByte(), f.AsByte()); - Vector128 j = Sse2.ShiftRightLogical(i.AsInt16(), 8); - Vector128 output = Sse2.Or(j.AsByte(), a); - Sse2.Store((byte*)pos, output); - } + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); + Vector128 input = Unsafe.As>(ref pos); + Vector128 a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); + Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); + Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); + Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); + Vector128 g = Sse2.MultiplyHigh(f, multsb2.AsInt16()); + Vector128 h = Sse2.ShiftRightLogical(g.AsInt32(), 8); + Vector128 i = Sse2.Add(h.AsByte(), f.AsByte()); + Vector128 j = Sse2.ShiftRightLogical(i.AsInt16(), 8); + Vector128 output = Sse2.Or(j.AsByte(), a); + Unsafe.As>(ref pos) = output.AsUInt32(); + } - if (idx != pixelData.Length) - { - TransformColorInverseNoneVectorized(m, pixelData.Slice(idx)); - } + if (idx != pixelData.Length) + { + TransformColorInverseNoneVectorized(m, pixelData.Slice(idx)); } } else @@ -885,15 +857,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int correction = (int)((23 * (origV & (y - 1))) >> 4); return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction; } - else - { - return (float)(Log2Reciprocal * v * Math.Log(v)); - } + + return (float)(Log2Reciprocal * v * Math.Log(v)); } private static float FastLog2Slow(uint v) { Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); + if (v < ApproxLogWithCorrectionMax) { int logCnt = 0;