Browse Source

Merge branch 'master' into bp/upscalesse

pull/1836/head
Brian Popow 4 years ago
committed by GitHub
parent
commit
e6921e1779
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 268
      src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs
  2. 341
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  3. 185
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
  4. 7
      tests/ImageSharp.Benchmarks/benchmark.sh
  5. 92
      tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs
  6. 6
      tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
  7. 6
      tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

268
src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs

@ -0,0 +1,268 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal static class ColorSpaceTransformUtils
{
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
private static readonly Vector256<byte> CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte();
private static readonly Vector256<byte> CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte();
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
#endif
public static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && tileWidth >= 16)
{
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r = Avx2.Or(r0, r1);
Vector256<byte> gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
Vector256<byte> gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
Vector256<ushort> gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector256<byte> g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
Vector256<short> a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
Vector256<byte> c = Avx2.Subtract(gb.AsByte(), b.AsByte());
Vector256<byte> d = Avx2.Subtract(c, a.AsByte());
Vector256<byte> e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else if (Sse41.IsSupported)
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else
#endif
{
CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
}
}
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
public static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && tileWidth >= 16)
{
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0
Vector256<byte> g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256);
Vector256<ushort> g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector256<int> a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector256<int> a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16);
Vector256<ushort> a = Avx2.PackUnsignedSaturate(a0, a1); // x r
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector256<byte> c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector256<byte> d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r'
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = d.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (nint x = 0; x <= tileWidth - span; x += span)
{
nint input0Idx = x;
nint input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = d.AsUInt16();
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else
#endif
{
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
}
}
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
}
}

341
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -42,12 +42,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static readonly Vector128<byte> TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector256<byte> TransformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector256<byte> TransformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector256<byte> TransformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
#endif
@ -122,76 +128,67 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (Avx2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 8; i += 8)
{
int i;
for (i = 0; i + 8 <= numPixels; i += 8)
{
uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Add(input, in0g0g);
Avx.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Add(input, in0g0g);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Ssse3.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Add(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Add(input, in0g0g);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Sse2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
AddGreenToBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else
#endif
{
AddGreenToBlueAndRedNoneVectorized(pixelData);
AddGreenToBlueAndRedScalar(pixelData);
}
}
private static void AddGreenToBlueAndRedNoneVectorized(Span<uint> pixelData)
private static void AddGreenToBlueAndRedScalar(Span<uint> pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
@ -211,76 +208,67 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (Avx2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 8; i += 8)
{
int i;
for (i = 0; i + 8 <= numPixels; i += 8)
{
uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Avx.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Ssse3.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else if (Sse2.IsSupported)
{
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
nint i;
for (i = 0; i <= numPixels - 4; i += 4)
{
int i;
for (i = 0; i + 4 <= numPixels; i += 4)
{
uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData.Slice(i));
}
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData.Slice((int)i));
}
}
else
#endif
{
SubtractGreenFromBlueAndRedNoneVectorized(pixelData);
SubtractGreenFromBlueAndRedScalar(pixelData);
}
}
private static void SubtractGreenFromBlueAndRedNoneVectorized(Span<uint> pixelData)
private static void SubtractGreenFromBlueAndRedScalar(Span<uint> pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
@ -403,49 +391,74 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
/// </summary>
/// <param name="m">The Vp8LMultipliers.</param>
/// <param name="data">The pixel data to transform.</param>
/// <param name="pixelData">The pixel data to transform.</param>
/// <param name="numPixels">The number of pixels to process.</param>
public static void TransformColor(Vp8LMultipliers m, Span<uint> data, int numPixels)
public static void TransformColor(Vp8LMultipliers m, Span<uint> pixelData, int numPixels)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
if (Avx2.IsSupported && numPixels >= 8)
{
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nint idx;
for (idx = 0; idx <= numPixels - 8; idx += 8)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), TransformColorAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<short> e = Avx2.ShiftLeftLogical(input.AsInt16(), 8);
Vector256<short> f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector256<int> g = Avx2.ShiftRightLogical(f.AsInt32(), 16);
Vector256<byte> h = Avx2.Add(g.AsByte(), d.AsByte());
Vector256<byte> i = Avx2.And(h, TransformColorRedBlueMask256);
Vector256<byte> output = Avx2.Subtract(input.AsByte(), i);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (idx != numPixels)
{
TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
}
}
else if (Sse2.IsSupported)
{
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
fixed (uint* src = data)
nint idx;
for (idx = 0; idx <= numPixels - 4; idx += 4)
{
int idx;
for (idx = 0; idx + 4 <= numPixels; idx += 4)
{
uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask);
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask);
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (idx != numPixels)
{
TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx);
}
if (idx != numPixels)
{
TransformColorScalar(m, pixelData.Slice((int)idx), numPixels - (int)idx);
}
}
else
#endif
{
TransformColorNoneVectorized(m, data, numPixels);
TransformColorScalar(m, pixelData, numPixels);
}
}
private static void TransformColorNoneVectorized(Vp8LMultipliers m, Span<uint> data, int numPixels)
private static void TransformColorScalar(Vp8LMultipliers m, Span<uint> data, int numPixels)
{
for (int i = 0; i < numPixels; i++)
{
@ -471,45 +484,71 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static void TransformColorInverse(Vp8LMultipliers m, Span<uint> pixelData)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
if (Avx2.IsSupported && pixelData.Length >= 8)
{
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nint idx;
for (idx = 0; idx <= pixelData.Length - 8; idx += 8)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), TransformColorInverseAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<byte> e = Avx2.Add(input.AsByte(), d.AsByte());
Vector256<short> f = Avx2.ShiftLeftLogical(e.AsInt16(), 8);
Vector256<short> g = Avx2.MultiplyHigh(f, multsb2.AsInt16());
Vector256<int> h = Avx2.ShiftRightLogical(g.AsInt32(), 8);
Vector256<byte> i = Avx2.Add(h.AsByte(), f.AsByte());
Vector256<short> j = Avx2.ShiftRightLogical(i.AsInt16(), 8);
Vector256<byte> output = Avx2.Or(j.AsByte(), a);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (idx != pixelData.Length)
{
TransformColorInverseScalar(m, pixelData.Slice((int)idx));
}
}
else if (Sse2.IsSupported)
{
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
fixed (uint* src = pixelData)
nint idx;
for (idx = 0; idx <= pixelData.Length - 4; idx += 4)
{
int idx;
for (idx = 0; idx + 4 <= pixelData.Length; idx += 4)
{
uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
Vector128<short> g = Sse2.MultiplyHigh(f, multsb2.AsInt16());
Vector128<int> h = Sse2.ShiftRightLogical(g.AsInt32(), 8);
Vector128<byte> i = Sse2.Add(h.AsByte(), f.AsByte());
Vector128<short> j = Sse2.ShiftRightLogical(i.AsInt16(), 8);
Vector128<byte> output = Sse2.Or(j.AsByte(), a);
Sse2.Store((byte*)pos, output);
}
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
Vector128<short> g = Sse2.MultiplyHigh(f, multsb2.AsInt16());
Vector128<int> h = Sse2.ShiftRightLogical(g.AsInt32(), 8);
Vector128<byte> i = Sse2.Add(h.AsByte(), f.AsByte());
Vector128<short> j = Sse2.ShiftRightLogical(i.AsInt16(), 8);
Vector128<byte> output = Sse2.Or(j.AsByte(), a);
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (idx != pixelData.Length)
{
TransformColorInverseNoneVectorized(m, pixelData.Slice(idx));
}
if (idx != pixelData.Length)
{
TransformColorInverseScalar(m, pixelData.Slice((int)idx));
}
}
else
#endif
{
TransformColorInverseNoneVectorized(m, pixelData);
TransformColorInverseScalar(m, pixelData);
}
}
private static void TransformColorInverseNoneVectorized(Vp8LMultipliers m, Span<uint> pixelData)
private static void TransformColorInverseScalar(Vp8LMultipliers m, Span<uint> pixelData)
{
for (int i = 0; i < pixelData.Length; i++)
{
@ -744,6 +783,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (float)retVal;
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorRed(sbyte greenToRed, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
@ -752,6 +792,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)(newRed & 0xff);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
@ -818,15 +859,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int correction = (int)((23 * (origV & (y - 1))) >> 4);
return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction;
}
else
{
return (float)(Log2Reciprocal * v * Math.Log(v));
}
return (float)(Log2Reciprocal * v * Math.Log(v));
}
private static float FastLog2Slow(uint v)
{
Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
@ -1288,6 +1328,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector256<int> MkCst32(int hi, int lo) => Vector256.Create((hi << 16) | (lo & 0xffff));
#endif
private static uint Select(uint a, uint b, uint c, Span<short> scratch)

185
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -5,11 +5,6 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
/// <summary>
@ -34,22 +29,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const int PredLowEffort = 11;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
#endif
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<sbyte> DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 };
@ -572,19 +551,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)lower;
}
else
{
// upper is closer to residual than lower.
if (residual <= boundaryResidual && upper > boundaryResidual)
{
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint <= residual
// (since upper is closer than lower) and residual is below the boundary.
return (byte)(lower + (quantization >> 1));
}
return (byte)(upper & 0xff);
// upper is closer to residual than lower.
if (residual <= boundaryResidual && upper > boundaryResidual)
{
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint <= residual
// (since upper is closer than lower) and residual is below the boundary.
return (byte)(lower + (quantization >> 1));
}
return (byte)upper;
}
/// <summary>
@ -980,7 +957,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
ColorSpaceTransformUtils.CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo);
if ((byte)greenToRed == prevX.GreenToRed)
@ -1018,7 +995,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
ColorSpaceTransformUtils.CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo);
if ((byte)greenToBlue == prevX.GreenToBlue)
{
@ -1057,146 +1034,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff;
}
private static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo);
}
}
else
#endif
{
CollectColorRedTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToRed, histo);
}
}
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorRed((sbyte)greenToRed, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
private static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
else
#endif
{
CollectColorBlueTransformsNoneVectorized(bgra, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
}
}
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
int pos = 0;
while (tileHeight-- > 0)
{
for (int x = 0; x < tileWidth; x++)
{
int idx = LosslessUtils.TransformColorBlue((sbyte)greenToBlue, (sbyte)redToBlue, bgra[pos + x]);
++histo[idx];
}
pos += stride;
}
}
private static float PredictionCostSpatialHistogram(int[][] accumulated, int[][] tile)
{
double retVal = 0.0d;

7
tests/ImageSharp.Benchmarks/benchmark.sh

@ -1,7 +0,0 @@
#!/bin/bash
# Build in release mode
dotnet build -c Release -f netcoreapp2.0
# Run benchmarks
dotnet bin/Release/netcoreapp2.0/ImageSharp.Benchmarks.dll

92
tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs

@ -0,0 +1,92 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
[Trait("Format", "Webp")]
public class ColorSpaceTransformUtilsTests
{
private static void RunCollectColorBlueTransformsTest()
{
uint[] pixelData =
{
3074, 256, 256, 256, 0, 65280, 65280, 65280, 256, 256, 0, 256, 0, 65280, 0, 65280, 16711680, 256,
256, 0, 65024, 0, 256, 256, 0, 65280, 0, 65280, 0, 256, 0, 256
};
int[] expectedOutput =
{
31, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int[] histo = new int[256];
ColorSpaceTransformUtils.CollectColorBlueTransforms(pixelData, 0, 32, 1, 0, 0, histo);
Assert.Equal(expectedOutput, histo);
}
private static void RunCollectColorRedTransformsTest()
{
uint[] pixelData =
{
3074, 256, 256, 256, 0, 65280, 65280, 65280, 256, 256, 0, 256, 0, 65280, 0, 65280, 16711680, 256,
256, 0, 65024, 0, 256, 256, 0, 65280, 0, 65280, 0, 256, 0, 256
};
int[] expectedOutput =
{
31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
};
int[] histo = new int[256];
ColorSpaceTransformUtils.CollectColorRedTransforms(pixelData, 0, 32, 1, 0, histo);
Assert.Equal(expectedOutput, histo);
}
[Fact]
public void CollectColorBlueTransforms_Works() => RunCollectColorBlueTransformsTest();
[Fact]
public void CollectColorRedTransforms_Works() => RunCollectColorRedTransformsTest();
#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void CollectColorBlueTransforms_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.AllowAll);
[Fact]
public void CollectColorBlueTransforms_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.DisableSSE41);
[Fact]
public void CollectColorBlueTransforms_WithoutAvx2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorBlueTransformsTest, HwIntrinsics.DisableAVX2);
[Fact]
public void CollectColorRedTransforms_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.AllowAll);
[Fact]
public void CollectColorRedTransforms_WithoutSSE41_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.DisableSSE41);
[Fact]
public void CollectColorRedTransforms_WithoutAvx2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCollectColorRedTransformsTest, HwIntrinsics.DisableAVX2);
#endif
}
}

6
tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs

@ -257,11 +257,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Fact]
public void TransformColor_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableSSE2);
[Fact]
public void TransformColor_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableAVX2);
[Fact]
public void TransformColorInverse_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.AllowAll);
[Fact]
public void TransformColorInverse_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.DisableSSE2);
[Fact]
public void TransformColorInverse_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorInverseTest, HwIntrinsics.DisableAVX2);
#endif
}
}

6
tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

@ -40,8 +40,13 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Fact]
public void ColorSpaceTransform_WithBikeImage_WithoutSSE41_Works()
=> FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_WithBikeImage_ProducesExpectedData, HwIntrinsics.DisableSSE41);
[Fact]
public void ColorSpaceTransform_WithBikeImage_WithoutAvx2_Works()
=> FeatureTestRunner.RunWithHwIntrinsicsFeature(ColorSpaceTransform_WithBikeImage_ProducesExpectedData, HwIntrinsics.DisableAVX2);
#endif
// Test image: Input\Webp\peak.png
private static void RunColorSpaceTransformTestWithPeakImage()
{
// arrange
@ -99,6 +104,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
Assert.Equal(expectedData, transformData);
}
// Test image: Input\Png\Bike.png
private static void RunColorSpaceTransformTestWithBikeImage()
{
// arrange

Loading…
Cancel
Save