Browse Source

Add SSE2 version of TransformColor

pull/1552/head
Brian Popow 5 years ago
parent
commit
0676e68bfd
  1. 98
      src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs
  2. 13
      src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs
  3. 36
      tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs
  4. 50
      tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs

98
src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs

@ -391,6 +391,49 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
public static void TransformColor(Vp8LMultipliers m, Span<uint> data, int numPixels)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
var shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int idx;
fixed (uint* src = data)
{
for (idx = 0; idx + 4 <= numPixels; idx += 4)
{
var pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<byte> i = Sse2.And(h, maskredblue);
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output);
}
if (idx != numPixels)
{
TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx);
}
}
}
else
#endif
{
TransformColorNoneVectorized(m, data, numPixels);
}
}
public static void TransformColorNoneVectorized(Vp8LMultipliers m, Span<uint> data, int numPixels)
{
for (int i = 0; i < numPixels; i++)
{
uint argb = data[i];
@ -1140,6 +1183,33 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
}
}
/// <summary>
/// Computes sampled size of 'size' when sampling using 'sampling bits'.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int SubSampleSize(int size, int samplingBits)
{
return (size + (1 << samplingBits) - 1) >> samplingBits;
}
/// <summary>
/// Sum of each component, mod 256.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint AddPixels(uint a, uint b)
{
uint alphaAndGreen = (a & 0xff00ff00u) + (b & 0xff00ff00u);
uint redAndBlue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu);
return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu);
}
// For sign-extended multiplying constants, pre-shifted by 5:
[MethodImpl(InliningOptions.ShortMethod)]
public static short Cst5b(int x)
{
return (short)(((short)(x << 8)) >> 5);
}
private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{
int a = AddSubtractComponentFull(
@ -1186,6 +1256,14 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
return a < 256 ? a : ~a >> 24;
}
#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128<int> MkCst16(int hi, int lo)
{
return Vector128.Create((hi << 16) | (lo & 0xffff));
}
#endif
private static uint Select(uint a, uint b, uint c)
{
int paMinusPb =
@ -1222,26 +1300,6 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
return Average2(Average2(a0, a1), Average2(a2, a3));
}
/// <summary>
/// Computes sampled size of 'size' when sampling using 'sampling bits'.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int SubSampleSize(int size, int samplingBits)
{
return (size + (1 << samplingBits) - 1) >> samplingBits;
}
/// <summary>
/// Sum of each component, mod 256.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint AddPixels(uint a, uint b)
{
uint alphaAndGreen = (a & 0xff00ff00u) + (b & 0xff00ff00u);
uint redAndBlue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu);
return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static uint GetArgbIndex(uint idx)
{

13
src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs

@ -720,6 +720,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int MaxDiffBetweenPixels(uint p1, uint p2)
{
int diffA = Math.Abs((int)(p1 >> 24) - (int)(p2 >> 24));
@ -729,6 +730,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
return GetMax(GetMax(diffA, diffR), GetMax(diffG, diffB));
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int MaxDiffAroundPixel(uint current, uint up, uint down, uint left, uint right)
{
int diffUp = MaxDiffBetweenPixels(current, up);
@ -738,6 +740,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
return GetMax(GetMax(diffUp, diffDown), GetMax(diffLeft, diffRight));
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void UpdateHisto(int[][] histoArgb, uint argb)
{
++histoArgb[0][argb >> 24];
@ -931,7 +934,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
var multsg = Vector128.Create((short)((greenToRed << 8) >> 5));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
var maskgreen = Vector128.Create(0x00ff00);
var mask = Vector128.Create((short)0xff);
@ -1002,11 +1005,11 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create((short)((redToBlue << 8) >> 5));
var multsg = Vector128.Create((short)((greenToBlue << 8) >> 5));
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
Vector128<byte> maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
@ -1084,6 +1087,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
return (float)retVal;
}
[MethodImpl(InliningOptions.ShortMethod)]
private static float PredictionCostCrossColor(int[] accumulated, int[] counts)
{
// Favor low entropy, locally and globally.
@ -1092,6 +1096,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
return LosslessUtils.CombinedShannonEntropy(counts, accumulated) + PredictionCostSpatial(counts, 3, expValue);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static float PredictionCostSpatial(int[] counts, int weight0, double expVal)
{
int significantSymbols = 256 >> 4;

36
tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs

@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
private string TestImageFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, this.TestImage);
[Params(TestImages.WebP.Peak)]
[Params(TestImages.Png.Bike)] // The bike image will have all 3 transforms as lossless webp.
public string TestImage { get; set; }
[GlobalSetup]
@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
});
}
/* Results 14.11.2020
/* Results 25.12.2020
* Summary *
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.630 (2004/?/20H1)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
@ -84,22 +84,22 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs
Job-GAIITM : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT
Job-HWOBSO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
| Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
|--------------------------- |----------- |-------------- |-------------- |----------:|----------:|----------:|------:|--------:|----------:|---------:|---------:|-----------:|
| 'Magick Webp Lossy' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 1.744 ms | 0.0399 ms | 0.0022 ms | 0.35 | 0.00 | 1.9531 | - | - | 13.58 KB |
| 'ImageSharp Webp Lossy' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 5.195 ms | 0.4241 ms | 0.0232 ms | 1.04 | 0.01 | 398.4375 | 93.7500 | - | 1661.83 KB |
| 'Magick Webp Lossless' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 4.993 ms | 0.5097 ms | 0.0279 ms | 1.00 | 0.00 | 7.8125 | - | - | 35.7 KB |
| 'ImageSharp Webp Lossless' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 12.174 ms | 1.2476 ms | 0.0684 ms | 2.44 | 0.02 | 1000.0000 | 984.3750 | 984.3750 | 8197.11 KB |
| | | | | | | | | | | | | |
| 'Magick Webp Lossy' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 1.747 ms | 0.0581 ms | 0.0032 ms | 0.35 | 0.00 | 1.9531 | - | - | 13.34 KB |
| 'ImageSharp Webp Lossy' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 3.527 ms | 0.0972 ms | 0.0053 ms | 0.71 | 0.00 | 402.3438 | 97.6563 | - | 1656.92 KB |
| 'Magick Webp Lossless' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 5.001 ms | 0.4543 ms | 0.0249 ms | 1.00 | 0.00 | 7.8125 | - | - | 35.39 KB |
| 'ImageSharp Webp Lossless' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 10.704 ms | 0.9844 ms | 0.0540 ms | 2.14 | 0.02 | 1000.0000 | 984.3750 | 984.3750 | 8182.6 KB |
| | | | | | | | | | | | | |
| 'Magick Webp Lossy' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 1.742 ms | 0.0279 ms | 0.0015 ms | 0.35 | 0.01 | 1.9531 | - | - | 13.31 KB |
| 'ImageSharp Webp Lossy' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 3.347 ms | 0.0638 ms | 0.0035 ms | 0.68 | 0.01 | 402.3438 | 97.6563 | - | 1656.93 KB |
| 'Magick Webp Lossless' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 4.954 ms | 1.4131 ms | 0.0775 ms | 1.00 | 0.00 | 7.8125 | - | - | 35.35 KB |
| 'ImageSharp Webp Lossless' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 10.737 ms | 2.5604 ms | 0.1403 ms | 2.17 | 0.05 | 1000.0000 | 984.3750 | 984.3750 | 8182.49 KB |
| Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
|--------------------------- |----------- |-------------- |------------- |----------:|-----------:|----------:|------:|--------:|-----------:|----------:|----------:|-------------:|
| 'Magick Webp Lossy' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 23.89 ms | 3.742 ms | 0.205 ms | 0.14 | 0.00 | - | - | - | 68.19 KB |
| 'ImageSharp Webp Lossy' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 72.27 ms | 20.228 ms | 1.109 ms | 0.43 | 0.01 | 6142.8571 | 142.8571 | - | 26360.05 KB |
| 'Magick Webp Lossless' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 167.75 ms | 41.847 ms | 2.294 ms | 1.00 | 0.00 | - | - | - | 520.28 KB |
| 'ImageSharp Webp Lossless' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 388.12 ms | 84.867 ms | 4.652 ms | 2.31 | 0.03 | 34000.0000 | 5000.0000 | 2000.0000 | 163174.2 KB |
| | | | | | | | | | | | | |
| 'Magick Webp Lossy' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 24.00 ms | 7.621 ms | 0.418 ms | 0.14 | 0.00 | - | - | - | 67.67 KB |
| 'ImageSharp Webp Lossy' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 47.77 ms | 6.498 ms | 0.356 ms | 0.29 | 0.00 | 6272.7273 | 272.7273 | 90.9091 | 26284.65 KB |
| 'Magick Webp Lossless' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 166.07 ms | 25.133 ms | 1.378 ms | 1.00 | 0.00 | - | - | - | 519.06 KB |
| 'ImageSharp Webp Lossless' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 356.60 ms | 249.912 ms | 13.699 ms | 2.15 | 0.10 | 34000.0000 | 5000.0000 | 2000.0000 | 162719.59 KB |
| | | | | | | | | | | | | |
| 'Magick Webp Lossy' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 23.95 ms | 5.531 ms | 0.303 ms | 0.14 | 0.00 | - | - | - | 67.57 KB |
| 'ImageSharp Webp Lossy' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 44.12 ms | 4.250 ms | 0.233 ms | 0.27 | 0.01 | 6250.0000 | 250.0000 | 83.3333 | 26284.72 KB |
| 'Magick Webp Lossless' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 165.94 ms | 66.670 ms | 3.654 ms | 1.00 | 0.00 | - | - | - | 523.05 KB |
| 'ImageSharp Webp Lossless' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 342.97 ms | 92.856 ms | 5.090 ms | 2.07 | 0.05 | 34000.0000 | 5000.0000 | 2000.0000 | 162725.32 KB |
*/
}
}

50
tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs

@ -68,6 +68,38 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
Assert.Equal(expectedOutput, pixelData);
}
private static void RunTransformColorTest()
{
uint[] pixelData =
{
5998579, 65790, 130301, 16646653, 196350, 130565, 16712702, 16583164, 16452092, 65790, 782600,
647446, 16571414, 16448771, 263931, 132601, 16711935, 131072, 511, 16711679, 132350, 329469,
16647676, 132093, 66303, 16647169, 16515584, 196607, 196096, 16646655, 514, 131326, 16712192,
327169, 16646655, 16776960, 3, 16712190, 511, 16646401, 16580612, 65535, 196092, 327425, 16319743,
392450, 196861, 16712192, 16711680, 130564, 16451071
};
var m = new Vp8LMultipliers()
{
GreenToBlue = 240,
GreenToRed = 232,
RedToBlue = 0
};
uint[] expectedOutput =
{
100279, 65790, 16710907, 16712190, 130813, 65028, 131840, 264449, 133377, 65790, 61697, 15917319,
14801924, 16317698, 591614, 394748, 16711935, 131072, 65792, 16711679, 328704, 656896, 132607,
328703, 197120, 66563, 16646657, 196607, 130815, 16711936, 131587, 131326, 66049, 261632, 16711936,
16776960, 3, 511, 65792, 16711938, 16580612, 65535, 65019, 327425, 16516097, 261377, 196861, 66049,
16711680, 65027, 16712962
};
LosslessUtils.TransformColor(m, pixelData, pixelData.Length);
Assert.Equal(expectedOutput, pixelData);
}
[Fact]
public void SubtractGreen_Works()
{
@ -80,6 +112,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
RunAddGreenToBlueAndRedTest();
}
[Fact]
public void TrannsformColor_Works()
{
RunTransformColorTest();
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void SubtractGreen_WithHardwareIntrinsics_Works()
@ -116,6 +154,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3);
}
[Fact]
public void TransformColor_WithHardwareIntrinsics_Works()
{
FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.AllowAll);
}
[Fact]
public void TransformColor_WithoutSSE2_Works()
{
FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableSSE2);
}
#endif
}
}

Loading…
Cancel
Save