Browse Source

420 converter fixes

pull/1632/head
Dmitry Pentin 5 years ago
parent
commit
953095f1b9
  1. 10
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
  2. 35
      src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs

10
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs

@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{ {
var unzig = ZigZag.CreateUnzigTable(); var unzig = ZigZag.CreateUnzigTable();
var pixelConverter = YCbCrForwardConverter444<TPixel>.Create(); var pixelConverter = YCbCrForwardConverter420<TPixel>.Create();
// ReSharper disable once InconsistentNaming // ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@ -138,23 +138,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
cancellationToken.ThrowIfCancellationRequested(); cancellationToken.ThrowIfCancellationRequested();
for (int x = 0; x < pixels.Width; x += 16) for (int x = 0; x < pixels.Width; x += 16)
{ {
for(int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
{ {
int yOff = i * 8; int yOff = i * 8;
currentRows.Update(pixelBuffer, y + yOff); currentRows.Update(pixelBuffer, y + yOff);
pixelConverter.Convert420(frame, x, y, ref currentRows, i); pixelConverter.Convert(frame, x, y, ref currentRows, i);
prevDCY = this.WriteBlock( prevDCY = this.WriteBlock(
QuantIndex.Luminance, QuantIndex.Luminance,
prevDCY, prevDCY,
ref pixelConverter.twinBlocksY[0], ref pixelConverter.YLeft,
ref luminanceQuantTable, ref luminanceQuantTable,
ref unzig); ref unzig);
prevDCY = this.WriteBlock( prevDCY = this.WriteBlock(
QuantIndex.Luminance, QuantIndex.Luminance,
prevDCY, prevDCY,
ref pixelConverter.twinBlocksY[1], ref pixelConverter.YRight,
ref luminanceQuantTable, ref luminanceQuantTable,
ref unzig); ref unzig);
} }

35
src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs

@ -28,6 +28,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
} }
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
// TODO: documentation
public const int AvxRegisterRgbCompatibilityOffset = 8;
private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[] private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
{ {
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0,
@ -205,7 +208,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// <summary> /// <summary>
/// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling
/// </summary> /// </summary>
public static void Convert420_16x8(ReadOnlySpan<Rgb24> rgbSpan, Span<Block8x8F> yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) public static void Convert420_16x8(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row)
{ {
Debug.Assert(IsSupported, "AVX2 is required to run this converter"); Debug.Assert(IsSupported, "AVX2 is required to run this converter");
@ -241,7 +244,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
// 16x2 => 8x1 // 16x2 => 8x1
for (int j = 0; j < 4; j++) // left 8x8 column conversions
for (int j = 0; j < 4; j += 2)
{
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
rg = Avx2.UnpackLow(rgb, zero);
bx = Avx2.UnpackHigh(rgb, zero);
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1);
// (0.299F * r) + (0.587F * g) + (0.114F * b);
Unsafe.Add(ref yBlockLeft.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
rDataLanes[j] = r;
gDataLanes[j] = g;
bDataLanes[j] = b;
}
// 16x2 => 8x1
// right 8x8 column conversions
for (int j = 1; j < 4; j += 2)
{ {
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte();
@ -257,7 +286,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1);
// (0.299F * r) + (0.587F * g) + (0.114F * b); // (0.299F * r) + (0.587F * g) + (0.114F * b);
Unsafe.Add(ref yBlocks[j & 1].V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); Unsafe.Add(ref yBlockRight.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
rDataLanes[j] = r; rDataLanes[j] = r;
gDataLanes[j] = g; gDataLanes[j] = g;

Loading…
Cancel
Save