Browse Source

Add SSE4 version of CollectColorRedTransforms

pull/1552/head
Brian Popow 5 years ago
parent
commit
92da90322e
  1. 90
      src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs
  2. 123
      tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

90
src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs

@ -5,6 +5,11 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
{
/// <summary>
@ -84,7 +89,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
usedSubtractGreen);
}
public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span<uint> argb, Span<uint> image)
public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span<uint> bgra, Span<uint> image)
{
int maxTileSize = 1 << bits;
int tileXSize = LosslessUtils.SubSampleSize(width, bits);
@ -118,10 +123,10 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
height,
accumulatedRedHisto,
accumulatedBlueHisto,
argb);
bgra);
image[offset] = MultipliersToColorCode(prevX);
CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, argb);
CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra);
// Gather accumulated histogram data.
for (int y = tileYOffset; y < allYMax; y++)
@ -131,13 +136,13 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
for (; ix < ixEnd; ix++)
{
uint pix = argb[ix];
if (ix >= 2 && pix == argb[ix - 2] && pix == argb[ix - 1])
uint pix = bgra[ix];
if (ix >= 2 && pix == bgra[ix - 2] && pix == bgra[ix - 1])
{
continue; // Repeated pixels are handled by backward references.
}
if (ix >= width + 2 && argb[ix - 2] == argb[ix - width - 2] && argb[ix - 1] == argb[ix - width - 1] && pix == argb[ix - width])
if (ix >= width + 2 && bgra[ix - 2] == bgra[ix - width - 2] && bgra[ix - 1] == bgra[ix - width - 1] && pix == bgra[ix - width])
{
continue; // Repeated pixels are handled by backward references.
}
@ -766,11 +771,11 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
}
}
private static Vp8LMultipliers GetBestColorTransformForTile(int tile_x, int tile_y, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span<uint> argb)
private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span<uint> argb)
{
int maxTileSize = 1 << bits;
int tileYOffset = tile_y * maxTileSize;
int tileXOffset = tile_x * maxTileSize;
int tileYOffset = tileY * maxTileSize;
int tileXOffset = tileX * maxTileSize;
int allXMax = GetMin(tileXOffset + maxTileSize, xSize);
int allYMax = GetMin(tileYOffset + maxTileSize, ySize);
int tileWidth = allXMax - tileXOffset;
@ -921,29 +926,84 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless
return curDiff;
}
private static void CollectColorRedTransforms(Span<uint> argb, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
private static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
var multsg = Vector128.Create((short)((greenToRed << 8) >> 5));
var maskgreen = Vector128.Create(0x00ff00);
var mask = Vector128.Create((short)0xff);
const int span = 8;
int y;
Span<ushort> values = stackalloc ushort[span];
for (y = 0; y < tileHeight; ++y)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, maskgreen.AsByte());
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, mask.AsByte()); // 0 r'
Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; ++i)
{
++histo[values[i]];
}
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else
#endif
{
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
}
}
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
{
int startIdx = 0;
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, argb[startIdx + x]);
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
++histo[idx];
}
startIdx += stride;
pos += stride;
}
}
private static void CollectColorBlueTransforms(Span<uint> argb, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
private static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, argb[pos + x]);
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
++histo[idx];
}

123
tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

@ -0,0 +1,123 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.IO;
using System.Runtime.CompilerServices;
using SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
[Trait("Format", "Webp")]
public class PredictorEncoderTests
{
[Fact]
public void ColorSpaceTransform_ProducesExpectedData()
{
RunColorSpaceTransformTest();
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void ColorSpaceTransform_WithHardwareIntrinsics_Works()
{
FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_ProducesExpectedData, HwIntrinsics.AllowAll);
}
[Fact]
public void ColorSpaceTransform_WithoutSSE41_Works()
{
FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_ProducesExpectedData, HwIntrinsics.DisableSSE41);
}
#endif
private static void RunColorSpaceTransformTest()
{
// arrange
uint[] expectedData =
{
4278191104, 4278191104, 4278191104, 4278191104, 4278191104, 4278191104, 4278191104, 4294577152,
4294707200, 4294707200, 4294707200, 4294707200, 4294837248, 4294837248, 4293926912, 4294316544,
4278191104, 4278191104, 4294837248, 4294837248, 4280287232, 4280350720, 4294447104, 4294707200,
4294838272, 4278516736, 4294837248, 4294837248, 4278516736, 4294707200, 4279298048, 4294837248,
4294837248, 4294837248, 4294837248, 4280287232, 4280287232, 4292670464, 4279633408, 4294838272,
4294837248, 4278516736, 4278516736, 4278516736, 4278516736, 4278516736, 4278778880, 4278193152,
4278191104, 4280287232, 4280287232, 4280287232, 4280287232, 4293971968, 4280612864, 4292802560,
4294837760, 4278516736, 4278516736, 4294837760, 4294707712, 4278516736, 4294837248, 4278193152,
4280287232, 4278984704, 4280287232, 4278243328, 4280287232, 4278244352, 4280287232, 4280025088,
4280025088, 4294837760, 4278192128, 4294838784, 4294837760, 4294707712, 4278778880, 4278324224,
4280287232, 4280287232, 4278202368, 4279115776, 4280287232, 4278243328, 4280287232, 4280287232,
4280025088, 4280287232, 4278192128, 4294838272, 4294838272, 4294837760, 4278190592, 4278778880,
4280875008, 4280287232, 4279896576, 4281075712, 4281075712, 4280287232, 4280287232, 4280287232,
4280287232, 4280287232, 4278190592, 4294709248, 4278516736, 4278516736, 4278584832, 4278909440,
4280287232, 4280287232, 4294367744, 4294621184, 4279115776, 4280287232, 4280287232, 4280351744,
4280287232, 4280287232, 4280287232, 4278513664, 4278516736, 4278716416, 4278584832, 4280291328,
4293062144, 4280287232, 4280287232, 4280287232, 4294456320, 4280291328, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4278513152, 4278716416, 4278584832, 4280291328,
4278198272, 4278198272, 4278589952, 4278198272, 4278198272, 4280287232, 4278765568, 4280287232,
4280287232, 4280287232, 4280287232, 4294712832, 4278513152, 4278716640, 4279300608, 4278584832,
4280156672, 4279373312, 4278589952, 4279373312, 4278328832, 4278328832, 4278328832, 4279634432,
4280287232, 4280287232, 4280287232, 4280287232, 4278457344, 4280483328, 4278584832, 4278385664,
4279634432, 4279373312, 4279634432, 4280287232, 4280287232, 4280156672, 4278589952, 4278328832,
4278198272, 4280156672, 4280483328, 4294363648, 4280287232, 4278376448, 4280287232, 4278647808,
4280287232, 4280287232, 4279373312, 4280287232, 4280287232, 4280156672, 4280287232, 4278198272,
4278198272, 4280156672, 4280287232, 4280287232, 4293669888, 4278765568, 4278765568, 4280287232,
4280287232, 4280287232, 4279634432, 4279634432, 4280287232, 4280287232, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4279373312, 4279764992, 4293539328, 4279896576,
4280287232, 4280287232, 4280287232, 4279634432, 4278198272, 4279634432, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4279503872, 4279503872, 4280288256,
4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232,
4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232, 4280287232
};
// Convert image pixels to bgra array.
var imgBytes = File.ReadAllBytes(TestImageFullPath(TestImages.WebP.Peak));
using var image = Image.Load(imgBytes);
uint[] bgra = ToBgra(image);
int colorTransformBits = 3;
int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits);
var transformData = new uint[transformWidth * transformHeight];
// act
PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData);
// assert
Assert.Equal(expectedData, transformData);
}
private static uint[] ToBgra<TPixel>(Image<TPixel> image)
where TPixel : unmanaged, IPixel<TPixel>
{
uint[] bgra = new uint[image.Width * image.Height];
int idx = 0;
for (int y = 0; y < image.Height; y++)
{
Span<TPixel> rowSpan = image.GetPixelRowSpan(y);
for (int x = 0; x < rowSpan.Length; x++)
{
bgra[idx++] = ToBgra32(rowSpan[x]).PackedValue;
}
}
return bgra;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Bgra32 ToBgra32<TPixel>(TPixel color)
where TPixel : unmanaged, IPixel<TPixel>
{
Rgba32 rgba = default;
color.ToRgba32(ref rgba);
var bgra = new Bgra32(rgba.R, rgba.G, rgba.B, rgba.A);
return bgra;
}
private static string TestImageFullPath(string path)
=> Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, path);
}
}
Loading…
Cancel
Save