Browse Source

Optimized AverageFilter

pull/1630/head
TechPizza 5 years ago
parent
commit
64e082615a
  1. 45
      src/ImageSharp/Formats/Png/Filters/AverageFilter.cs

45
src/ImageSharp/Formats/Png/Filters/AverageFilter.cs

@ -88,6 +88,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
if (Avx2.IsSupported)
{
Vector256<int> sumAccumulator = Vector256<int>.Zero;
Vector256<byte> allBitsSet = Avx2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();
for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
{
@ -95,7 +96,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
Vector256<byte> left = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Vector256<byte> above = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
Vector256<byte> res = Avx2.Subtract(scan, Average(left, above));
Vector256<byte> avg = Avx2.Xor(Avx2.Average(Avx2.Xor(left, allBitsSet), Avx2.Xor(above, allBitsSet)), allBitsSet);
Vector256<byte> res = Avx2.Subtract(scan, avg);
Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector256<byte>.Count;
@ -121,8 +124,8 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
}
else if (Sse2.IsSupported)
{
var allBitsSet = Vector128.Create((sbyte)-1);
Vector128<int> sumAccumulator = Vector128<int>.Zero;
Vector128<byte> allBitsSet = Sse2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();
for (int xLeft = x - bytesPerPixel; x + Vector128<byte>.Count <= scanline.Length; xLeft += Vector128<byte>.Count)
{
@ -130,7 +133,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
Vector128<byte> left = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Vector128<byte> above = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
Vector128<byte> res = Sse2.Subtract(scan, Average(left, above));
Vector128<byte> avg = Sse2.Xor(Sse2.Average(Sse2.Xor(left, allBitsSet), Sse2.Xor(above, allBitsSet)), allBitsSet);
Vector128<byte> res = Sse2.Subtract(scan, avg);
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector128<byte>.Count;
@ -142,7 +147,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
else
{
Vector128<sbyte> mask = Sse2.CompareGreaterThan(res.AsSByte(), Vector128<sbyte>.Zero);
mask = Sse2.Xor(mask, allBitsSet);
mask = Sse2.Xor(mask, allBitsSet.AsSByte());
absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask);
}
@ -189,37 +194,5 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
/// <returns>The <see cref="int"/></returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int Average(byte left, byte above) => (left + above) >> 1;
#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> Average(Vector128<byte> left, Vector128<byte> above)
{
Vector128<ushort> loLeft16 = Sse2.UnpackLow(left, Vector128<byte>.Zero).AsUInt16();
Vector128<ushort> hiLeft16 = Sse2.UnpackHigh(left, Vector128<byte>.Zero).AsUInt16();
Vector128<ushort> loAbove16 = Sse2.UnpackLow(above, Vector128<byte>.Zero).AsUInt16();
Vector128<ushort> hiAbove16 = Sse2.UnpackHigh(above, Vector128<byte>.Zero).AsUInt16();
Vector128<ushort> div1 = Sse2.ShiftRightLogical(Sse2.Add(loLeft16, loAbove16), 1);
Vector128<ushort> div2 = Sse2.ShiftRightLogical(Sse2.Add(hiLeft16, hiAbove16), 1);
return Sse2.PackUnsignedSaturate(div1.AsInt16(), div2.AsInt16());
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> Average(Vector256<byte> left, Vector256<byte> above)
{
Vector256<ushort> loLeft16 = Avx2.UnpackLow(left, Vector256<byte>.Zero).AsUInt16();
Vector256<ushort> hiLeft16 = Avx2.UnpackHigh(left, Vector256<byte>.Zero).AsUInt16();
Vector256<ushort> loAbove16 = Avx2.UnpackLow(above, Vector256<byte>.Zero).AsUInt16();
Vector256<ushort> hiAbove16 = Avx2.UnpackHigh(above, Vector256<byte>.Zero).AsUInt16();
Vector256<ushort> div1 = Avx2.ShiftRightLogical(Avx2.Add(loLeft16, loAbove16), 1);
Vector256<ushort> div2 = Avx2.ShiftRightLogical(Avx2.Add(hiLeft16, hiAbove16), 1);
return Avx2.PackUnsignedSaturate(div1.AsInt16(), div2.AsInt16());
}
#endif
}
}

Loading…
Cancel
Save