diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs new file mode 100644 index 000000000..4a8488f1b --- /dev/null +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs @@ -0,0 +1,268 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + +namespace SixLabors.ImageSharp.Formats.Webp.Lossless +{ + internal static class ColorSpaceTransformUtils + { +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte(); + + private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte(); + + private static readonly Vector256 CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte(); + + private static readonly Vector256 CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte(); + + private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); + + private static readonly Vector128 CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + + private static readonly Vector128 CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255); + + private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); + + private static readonly Vector256 CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255); + + private static readonly Vector256 CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30); + + private static readonly Vector256 CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); + + private static readonly Vector256 CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + + private static readonly Vector256 CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); +#endif + + public static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && tileWidth >= 16) + { + const int span = 16; + Span values = stackalloc ushort[span]; + var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue)); + var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue)); + for (int y = 0; y < tileHeight; y++) + { + Span srcSpan = bgra.Slice(y * stride); + ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); + for (int x = 0; x + span <= tileWidth; x += span) + { + int input0Idx = x; + int input1Idx = x + (span / 2); + Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); + Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); + Vector256 r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256); + Vector256 r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256); + Vector256 r = Avx2.Or(r0, r1); + Vector256 gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256); + Vector256 gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256); + Vector256 gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); + Vector256 g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256); + Vector256 a = Avx2.MultiplyHigh(r.AsInt16(), multsr); + Vector256 b = Avx2.MultiplyHigh(g.AsInt16(), multsg); + Vector256 c = Avx2.Subtract(gb.AsByte(), b.AsByte()); + Vector256 d = Avx2.Subtract(c, a.AsByte()); + Vector256 e = Avx2.And(d, CollectColorBlueTransformsBlueMask256); + + ref ushort outputRef = ref MemoryMarshal.GetReference(values); + Unsafe.As>(ref outputRef) = e.AsUInt16(); + + for (int i = 0; i < span; i++) + { + ++histo[values[i]]; + } + } + } + + int leftOver = tileWidth & (span - 1); + if (leftOver > 0) + { + CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo); + } + } + else if (Sse41.IsSupported) + { + const int span = 8; + Span values = stackalloc ushort[span]; + var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue)); + var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue)); + for (int y = 0; y < tileHeight; y++) + { + Span srcSpan = bgra.Slice(y * stride); + ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); + for (int x = 0; x + span <= tileWidth; x += span) + { + int input0Idx = x; + int input1Idx = x + (span / 2); + Vector128 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); + Vector128 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); + Vector128 r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask); + Vector128 r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask); + Vector128 r = Sse2.Or(r0, r1); + Vector128 gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask); + Vector128 gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask); + Vector128 gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); + Vector128 g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask); + Vector128 a = Sse2.MultiplyHigh(r.AsInt16(), multsr); + Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); + Vector128 c = Sse2.Subtract(gb.AsByte(), b.AsByte()); + Vector128 d = Sse2.Subtract(c, a.AsByte()); + Vector128 e = Sse2.And(d, CollectColorBlueTransformsBlueMask); + + ref ushort outputRef = ref MemoryMarshal.GetReference(values); + Unsafe.As>(ref outputRef) = e.AsUInt16(); + + for (int i = 0; i < span; i++) + { + ++histo[values[i]]; + } + } + } + + int leftOver = tileWidth & (span - 1); + if (leftOver > 0) + { + CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo); + } + } + else +#endif + { + CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); + } + } + + private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) + { + int pos = 0; + while (tileHeight-- > 0) + { + for (int x = 0; x < tileWidth; x++) + { + int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]); + ++histo[idx]; + } + + pos += stride; + } + } + + public static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && tileWidth >= 16) + { + var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed)); + const int span = 16; + Span values = stackalloc ushort[span]; + for (int y = 0; y < tileHeight; y++) + { + Span srcSpan = bgra.Slice(y * stride); + ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); + for (int x = 0; x + span <= tileWidth; x += span) + { + int input0Idx = x; + int input1Idx = x + (span / 2); + Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); + Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); + Vector256 g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0 + Vector256 g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256); + Vector256 g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 + Vector256 a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r + Vector256 a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16); + Vector256 a = Avx2.PackUnsignedSaturate(a0, a1); // x r + Vector256 b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr + Vector256 c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r' + Vector256 d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r' + + ref ushort outputRef = ref MemoryMarshal.GetReference(values); + Unsafe.As>(ref outputRef) = d.AsUInt16(); + + for (int i = 0; i < span; i++) + { + ++histo[values[i]]; + } + } + } + + int leftOver = tileWidth & (span - 1); + if (leftOver > 0) + { + CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo); + } + } + else if (Sse41.IsSupported) + { + var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); + const int span = 8; + Span values = stackalloc ushort[span]; + for (int y = 0; y < tileHeight; y++) + { + Span srcSpan = bgra.Slice(y * stride); + ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); + for (int x = 0; x + span <= tileWidth; x += span) + { + int input0Idx = x; + int input1Idx = x + (span / 2); + Vector128 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); + Vector128 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); + Vector128 g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0 + Vector128 g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask); + Vector128 g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 + Vector128 a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r + Vector128 a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16); + Vector128 a = Sse41.PackUnsignedSaturate(a0, a1); // x r + Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr + Vector128 c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r' + Vector128 d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r' + + ref ushort outputRef = ref MemoryMarshal.GetReference(values); + Unsafe.As>(ref outputRef) = d.AsUInt16(); + + for (int i = 0; i < span; i++) + { + ++histo[values[i]]; + } + } + } + + int leftOver = tileWidth & (span - 1); + if (leftOver > 0) + { + CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo); + } + } + else +#endif + { + CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo); + } + } + + private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) + { + int pos = 0; + while (tileHeight-- > 0) + { + for (int x = 0; x < tileWidth; x++) + { + int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]); + ++histo[idx]; + } + + pos += stride; + } + } + } +} diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index 95c9065b3..1f7b284e9 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -5,11 +5,6 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif - namespace SixLabors.ImageSharp.Formats.Webp.Lossless { /// @@ -34,37 +29,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private const int PredLowEffort = 11; -#if SUPPORTS_RUNTIME_INTRINSICS - private static readonly Vector128 CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte(); - - private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte(); - - private static readonly Vector256 CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte(); - - private static readonly Vector256 CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte(); - - private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - - private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); - - private static readonly Vector128 CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); - - private static readonly Vector128 CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255); - - private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); - - private static readonly Vector256 CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255); - - private static readonly Vector256 CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30); - - private static readonly Vector256 CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); - - private static readonly Vector256 CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); - - private static readonly Vector256 CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - -#endif - // This uses C#'s compiler optimization to refer to assembly's static data directly. private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 }; @@ -993,7 +957,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Span histo = scratch.Slice(0, 256); histo.Clear(); - CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo); + ColorSpaceTransformUtils.CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo); double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo); if ((byte)greenToRed == prevX.GreenToRed) @@ -1031,7 +995,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Span histo = scratch.Slice(0, 256); histo.Clear(); - CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); + ColorSpaceTransformUtils.CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo); if ((byte)greenToBlue == prevX.GreenToBlue) { @@ -1070,228 +1034,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return curDiff; } - private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Avx2.IsSupported && tileWidth >= 16) - { - var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed)); - const int span = 16; - Span values = stackalloc ushort[span]; - for (int y = 0; y < tileHeight; y++) - { - Span srcSpan = bgra.Slice(y * stride); - ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); - for (int x = 0; x + span <= tileWidth; x += span) - { - int input0Idx = x; - int input1Idx = x + (span / 2); - Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); - Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); - Vector256 g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0 - Vector256 g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256); - Vector256 g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 - Vector256 a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r - Vector256 a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16); - Vector256 a = Avx2.PackUnsignedSaturate(a0, a1); // x r - Vector256 b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr - Vector256 c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r' - Vector256 d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r' - - ref ushort outputRef = ref MemoryMarshal.GetReference(values); - Unsafe.As>(ref outputRef) = d.AsUInt16(); - - for (int i = 0; i < span; i++) - { - ++histo[values[i]]; - } - } - } - - int leftOver = tileWidth & (span - 1); - if (leftOver > 0) - { - CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo); - } - } - else if (Sse41.IsSupported) - { - var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); - const int span = 8; - Span values = stackalloc ushort[span]; - for (int y = 0; y < tileHeight; y++) - { - Span srcSpan = bgra.Slice(y * stride); - ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); - for (int x = 0; x + span <= tileWidth; x += span) - { - int input0Idx = x; - int input1Idx = x + (span / 2); - Vector128 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); - Vector128 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); - Vector128 g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0 - Vector128 g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask); - Vector128 g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 - Vector128 a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r - Vector128 a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16); - Vector128 a = Sse41.PackUnsignedSaturate(a0, a1); // x r - Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr - Vector128 c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r' - Vector128 d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r' - - ref ushort outputRef = ref MemoryMarshal.GetReference(values); - Unsafe.As>(ref outputRef) = d.AsUInt16(); - - for (int i = 0; i < span; i++) - { - ++histo[values[i]]; - } - } - } - - int leftOver = tileWidth & (span - 1); - if (leftOver > 0) - { - CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo); - } - } - else -#endif - { - CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo); - } - } - - private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) - { - int pos = 0; - while (tileHeight-- > 0) - { - for (int x = 0; x < tileWidth; x++) - { - int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]); - ++histo[idx]; - } - - pos += stride; - } - } - - private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Avx2.IsSupported && tileWidth >= 16) - { - const int span = 16; - Span values = stackalloc ushort[span]; - var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue)); - var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue)); - for (int y = 0; y < tileHeight; y++) - { - Span srcSpan = bgra.Slice(y * stride); - ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); - for (int x = 0; x + span <= tileWidth; x += span) - { - int input0Idx = x; - int input1Idx = x + (span / 2); - Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); - Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); - Vector256 r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256); - Vector256 r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256); - Vector256 r = Avx2.Or(r0, r1); - Vector256 gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256); - Vector256 gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256); - Vector256 gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); - Vector256 g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256); - Vector256 a = Avx2.MultiplyHigh(r.AsInt16(), multsr); - Vector256 b = Avx2.MultiplyHigh(g.AsInt16(), multsg); - Vector256 c = Avx2.Subtract(gb.AsByte(), b.AsByte()); - Vector256 d = Avx2.Subtract(c, a.AsByte()); - Vector256 e = Avx2.And(d, CollectColorBlueTransformsBlueMask256); - - ref ushort outputRef = ref MemoryMarshal.GetReference(values); - Unsafe.As>(ref outputRef) = e.AsUInt16(); - - for (int i = 0; i < span; i++) - { - ++histo[values[i]]; - } - } - } - - int leftOver = tileWidth & (span - 1); - if (leftOver > 0) - { - CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo); - } - } - else if (Sse41.IsSupported) - { - const int span = 8; - Span values = stackalloc ushort[span]; - var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue)); - var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue)); - for (int y = 0; y < tileHeight; y++) - { - Span srcSpan = bgra.Slice(y * stride); - ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); - for (int x = 0; x + span <= tileWidth; x += span) - { - int input0Idx = x; - int input1Idx = x + (span / 2); - Vector128 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); - Vector128 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); - Vector128 r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask); - Vector128 r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask); - Vector128 r = Sse2.Or(r0, r1); - Vector128 gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask); - Vector128 gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask); - Vector128 gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); - Vector128 g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask); - Vector128 a = Sse2.MultiplyHigh(r.AsInt16(), multsr); - Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); - Vector128 c = Sse2.Subtract(gb.AsByte(), b.AsByte()); - Vector128 d = Sse2.Subtract(c, a.AsByte()); - Vector128 e = Sse2.And(d, CollectColorBlueTransformsBlueMask); - - ref ushort outputRef = ref MemoryMarshal.GetReference(values); - Unsafe.As>(ref outputRef) = e.AsUInt16(); - - for (int i = 0; i < span; i++) - { - ++histo[values[i]]; - } - } - } - - int leftOver = tileWidth & (span - 1); - if (leftOver > 0) - { - CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo); - } - } - else -#endif - { - CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); - } - } - - private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) - { - int pos = 0; - while (tileHeight-- > 0) - { - for (int x = 0; x < tileWidth; x++) - { - int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]); - ++histo[idx]; - } - - pos += stride; - } - } - private static float PredictionCostSpatialHistogram(int[][] accumulated, int[][] tile) { double retVal = 0.0d;