mirror of https://github.com/SixLabors/ImageSharp
11 changed files with 1 additions and 1739 deletions
@ -1,35 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
/// <summary>
|
|||
/// Enumerates the Huffman tables
|
|||
/// </summary>
|
|||
internal enum HuffIndex |
|||
{ |
|||
/// <summary>
|
|||
/// The DC luminance huffman table index
|
|||
/// </summary>
|
|||
LuminanceDC = 0, |
|||
|
|||
// ReSharper disable UnusedMember.Local
|
|||
|
|||
/// <summary>
|
|||
/// The AC luminance huffman table index
|
|||
/// </summary>
|
|||
LuminanceAC = 1, |
|||
|
|||
/// <summary>
|
|||
/// The DC chrominance huffman table index
|
|||
/// </summary>
|
|||
ChrominanceDC = 2, |
|||
|
|||
/// <summary>
|
|||
/// The AC chrominance huffman table index
|
|||
/// </summary>
|
|||
ChrominanceAC = 3, |
|||
|
|||
// ReSharper restore UnusedMember.Local
|
|||
} |
|||
} |
|||
@ -1,127 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
using SixLabors.ImageSharp.Advanced; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
/// <summary>
|
|||
/// On-stack worker struct to efficiently encapsulate the TPixel -> L8 -> Y conversion chain of 8x8 pixel blocks.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel type to work on</typeparam>
|
|||
internal ref struct LuminanceForwardConverter<TPixel> |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
/// <summary>
|
|||
/// Number of pixels processed per single <see cref="Convert(int, int, ref RowOctet{TPixel})"/> call
|
|||
/// </summary>
|
|||
private const int PixelsPerSample = 8 * 8; |
|||
|
|||
/// <summary>
|
|||
/// The Y component
|
|||
/// </summary>
|
|||
public Block8x8F Y; |
|||
|
|||
/// <summary>
|
|||
/// Temporal 64-pixel span to hold unconverted TPixel data.
|
|||
/// </summary>
|
|||
private readonly Span<TPixel> pixelSpan; |
|||
|
|||
/// <summary>
|
|||
/// Temporal 64-byte span to hold converted <see cref="L8"/> data.
|
|||
/// </summary>
|
|||
private readonly Span<L8> l8Span; |
|||
|
|||
/// <summary>
|
|||
/// Sampled pixel buffer size.
|
|||
/// </summary>
|
|||
private readonly Size samplingAreaSize; |
|||
|
|||
/// <summary>
|
|||
/// <see cref="Configuration"/> for internal operations.
|
|||
/// </summary>
|
|||
private readonly Configuration config; |
|||
|
|||
public LuminanceForwardConverter(ImageFrame<TPixel> frame) |
|||
{ |
|||
this.Y = default; |
|||
|
|||
this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); |
|||
this.l8Span = new L8[PixelsPerSample].AsSpan(); |
|||
|
|||
this.samplingAreaSize = new Size(frame.Width, frame.Height); |
|||
this.config = frame.GetConfiguration(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets size of sampling area from given frame pixel buffer.
|
|||
/// </summary>
|
|||
private static Size SampleSize => new(8, 8); |
|||
|
|||
/// <summary>
|
|||
/// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (<see cref="Y"/>)
|
|||
/// </summary>
|
|||
public void Convert(int x, int y, ref RowOctet<TPixel> currentRows) |
|||
{ |
|||
YCbCrForwardConverter<TPixel>.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); |
|||
|
|||
PixelOperations<TPixel>.Instance.ToL8(this.config, this.pixelSpan, this.l8Span); |
|||
|
|||
ref Block8x8F yBlock = ref this.Y; |
|||
ref L8 l8Start = ref MemoryMarshal.GetReference(this.l8Span); |
|||
|
|||
if (RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
ConvertAvx(ref l8Start, ref yBlock); |
|||
} |
|||
else |
|||
{ |
|||
ConvertScalar(ref l8Start, ref yBlock); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts 8x8 L8 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics.
|
|||
/// </summary>
|
|||
/// <param name="l8Start">Start of span of L8 pixels with size of 64</param>
|
|||
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
|
|||
private static void ConvertAvx(ref L8 l8Start, ref Block8x8F yBlock) |
|||
{ |
|||
Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter"); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector128<byte> l8ByteSpan = ref Unsafe.As<L8, Vector128<byte>>(ref l8Start); |
|||
ref Vector256<float> destRef = ref yBlock.V0; |
|||
|
|||
const int bytesPerL8Stride = 8; |
|||
for (nint i = 0; i < 8; i++) |
|||
{ |
|||
Unsafe.Add(ref destRef, i) = Avx2.ConvertToVector256Single(Avx2.ConvertToVector256Int32(Unsafe.AddByteOffset(ref l8ByteSpan, bytesPerL8Stride * i))); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts 8x8 L8 pixel matrix to 8x8 Block of floats.
|
|||
/// </summary>
|
|||
/// <param name="l8Start">Start of span of L8 pixels with size of 64</param>
|
|||
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
|
|||
private static void ConvertScalar(ref L8 l8Start, ref Block8x8F yBlock) |
|||
{ |
|||
for (int i = 0; i < Block8x8F.Size; i++) |
|||
{ |
|||
ref L8 c = ref Unsafe.Add(ref l8Start, i); |
|||
yBlock[i] = c.PackedValue; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,165 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
using SixLabors.ImageSharp.Advanced; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
/// <summary>
|
|||
/// On-stack worker struct to convert TPixel -> Rgb24 of 8x8 pixel blocks.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel type to work on.</typeparam>
|
|||
internal ref struct RgbForwardConverter<TPixel> |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
/// <summary>
|
|||
/// Number of pixels processed per single <see cref="Convert(int, int, ref RowOctet{TPixel})"/> call
|
|||
/// </summary>
|
|||
private const int PixelsPerSample = 8 * 8; |
|||
|
|||
/// <summary>
|
|||
/// Total byte size of processed pixels converted from TPixel to <see cref="Rgb24"/>
|
|||
/// </summary>
|
|||
private const int RgbSpanByteSize = PixelsPerSample * 3; |
|||
|
|||
/// <summary>
|
|||
/// The Red component.
|
|||
/// </summary>
|
|||
public Block8x8F R; |
|||
|
|||
/// <summary>
|
|||
/// The Green component.
|
|||
/// </summary>
|
|||
public Block8x8F G; |
|||
|
|||
/// <summary>
|
|||
/// The Blue component.
|
|||
/// </summary>
|
|||
public Block8x8F B; |
|||
|
|||
/// <summary>
|
|||
/// Temporal 64-byte span to hold unconverted TPixel data.
|
|||
/// </summary>
|
|||
private readonly Span<TPixel> pixelSpan; |
|||
|
|||
/// <summary>
|
|||
/// Temporal 64-byte span to hold converted Rgb24 data.
|
|||
/// </summary>
|
|||
private readonly Span<Rgb24> rgbSpan; |
|||
|
|||
/// <summary>
|
|||
/// Sampled pixel buffer size.
|
|||
/// </summary>
|
|||
private readonly Size samplingAreaSize; |
|||
|
|||
/// <summary>
|
|||
/// <see cref="Configuration"/> for internal operations.
|
|||
/// </summary>
|
|||
private readonly Configuration config; |
|||
|
|||
public RgbForwardConverter(ImageFrame<TPixel> frame) |
|||
{ |
|||
this.R = default; |
|||
this.G = default; |
|||
this.B = default; |
|||
|
|||
// temporal pixel buffers
|
|||
this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); |
|||
this.rgbSpan = MemoryMarshal.Cast<byte, Rgb24>(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan()); |
|||
|
|||
// frame data
|
|||
this.samplingAreaSize = new Size(frame.Width, frame.Height); |
|||
this.config = frame.GetConfiguration(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets size of sampling area from given frame pixel buffer.
|
|||
/// </summary>
|
|||
private static Size SampleSize => new(8, 8); |
|||
|
|||
/// <summary>
|
|||
/// Converts a 8x8 image area inside 'pixels' at position (x, y) to Rgb24.
|
|||
/// </summary>
|
|||
public void Convert(int x, int y, ref RowOctet<TPixel> currentRows) |
|||
{ |
|||
YCbCrForwardConverter<TPixel>.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); |
|||
|
|||
PixelOperations<TPixel>.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan); |
|||
|
|||
ref Block8x8F redBlock = ref this.R; |
|||
ref Block8x8F greenBlock = ref this.G; |
|||
ref Block8x8F blueBlock = ref this.B; |
|||
|
|||
if (RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
ConvertAvx(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock); |
|||
} |
|||
else |
|||
{ |
|||
ConvertScalar(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts 8x8 RGB24 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics.
|
|||
/// </summary>
|
|||
/// <param name="rgbSpan">Span of Rgb24 pixels with size of 64</param>
|
|||
/// <param name="rBlock">8x8 destination matrix of Red converted data</param>
|
|||
/// <param name="gBlock">8x8 destination matrix of Blue converted data</param>
|
|||
/// <param name="bBlock">8x8 destination matrix of Green converted data</param>
|
|||
private static void ConvertAvx(Span<Rgb24> rgbSpan, ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock) |
|||
{ |
|||
Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter"); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan)); |
|||
ref Vector256<float> redRef = ref rBlock.V0; |
|||
ref Vector256<float> greenRef = ref gBlock.V0; |
|||
ref Vector256<float> blueRef = ref bBlock.V0; |
|||
var zero = Vector256.Create(0).AsByte(); |
|||
|
|||
var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.MoveFirst24BytesToSeparateLanes)); |
|||
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.ExtractRgb)); |
|||
Vector256<byte> rgb, rg, bx; |
|||
|
|||
const int bytesPerRgbStride = 24; |
|||
for (nint i = 0; i < 8; i++) |
|||
{ |
|||
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, bytesPerRgbStride * i).AsUInt32(), extractToLanesMask).AsByte(); |
|||
|
|||
rgb = Avx2.Shuffle(rgb, extractRgbMask); |
|||
|
|||
rg = Avx2.UnpackLow(rgb, zero); |
|||
bx = Avx2.UnpackHigh(rgb, zero); |
|||
|
|||
Unsafe.Add(ref redRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); |
|||
Unsafe.Add(ref greenRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); |
|||
Unsafe.Add(ref blueRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
private static void ConvertScalar(Span<Rgb24> rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock) |
|||
{ |
|||
ref Rgb24 rgbStart = ref MemoryMarshal.GetReference(rgbSpan); |
|||
|
|||
for (int i = 0; i < Block8x8F.Size; i++) |
|||
{ |
|||
Rgb24 c = Unsafe.Add(ref rgbStart, (nint)(uint)i); |
|||
|
|||
redBlock[i] = c.R; |
|||
greenBlock[i] = c.G; |
|||
blueBlock[i] = c.B; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,237 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
/// <summary>
|
|||
/// Provides 8-bit lookup tables for converting from Rgb to YCbCr colorspace.
|
|||
/// Methods to build the tables are based on libjpeg implementation.
|
|||
/// </summary>
|
|||
internal unsafe struct RgbToYCbCrConverterLut |
|||
{ |
|||
/// <summary>
|
|||
/// The red luminance table
|
|||
/// </summary>
|
|||
public fixed int YRTable[256]; |
|||
|
|||
/// <summary>
|
|||
/// The green luminance table
|
|||
/// </summary>
|
|||
public fixed int YGTable[256]; |
|||
|
|||
/// <summary>
|
|||
/// The blue luminance table
|
|||
/// </summary>
|
|||
public fixed int YBTable[256]; |
|||
|
|||
/// <summary>
|
|||
/// The red blue-chrominance table
|
|||
/// </summary>
|
|||
public fixed int CbRTable[256]; |
|||
|
|||
/// <summary>
|
|||
/// The green blue-chrominance table
|
|||
/// </summary>
|
|||
public fixed int CbGTable[256]; |
|||
|
|||
/// <summary>
|
|||
/// The blue blue-chrominance table
|
|||
/// B=>Cb and R=>Cr are the same
|
|||
/// </summary>
|
|||
public fixed int CbBTable[256]; |
|||
|
|||
/// <summary>
|
|||
/// The green red-chrominance table
|
|||
/// </summary>
|
|||
public fixed int CrGTable[256]; |
|||
|
|||
/// <summary>
|
|||
/// The blue red-chrominance table
|
|||
/// </summary>
|
|||
public fixed int CrBTable[256]; |
|||
|
|||
// Speediest right-shift on some machines and gives us enough accuracy at 4 decimal places.
|
|||
private const int ScaleBits = 16; |
|||
|
|||
private const int CBCrOffset = 128 << ScaleBits; |
|||
|
|||
private const int Half = 1 << (ScaleBits - 1); |
|||
|
|||
/// <summary>
|
|||
/// Initializes the YCbCr tables
|
|||
/// </summary>
|
|||
/// <returns>The initialized <see cref="RgbToYCbCrConverterLut"/></returns>
|
|||
public static RgbToYCbCrConverterLut Create() |
|||
{ |
|||
RgbToYCbCrConverterLut tables = default; |
|||
|
|||
for (int i = 0; i <= 255; i++) |
|||
{ |
|||
// The values for the calculations are left scaled up since we must add them together before rounding.
|
|||
tables.YRTable[i] = Fix(0.299F) * i; |
|||
tables.YGTable[i] = Fix(0.587F) * i; |
|||
tables.YBTable[i] = (Fix(0.114F) * i) + Half; |
|||
tables.CbRTable[i] = (-Fix(0.168735892F)) * i; |
|||
tables.CbGTable[i] = (-Fix(0.331264108F)) * i; |
|||
|
|||
// We use a rounding fudge - factor of 0.5 - epsilon for Cb and Cr.
|
|||
// This ensures that the maximum output will round to 255
|
|||
// not 256, and thus that we don't have to range-limit.
|
|||
//
|
|||
// B=>Cb and R=>Cr tables are the same
|
|||
tables.CbBTable[i] = (Fix(0.5F) * i) + CBCrOffset + Half - 1; |
|||
|
|||
tables.CrGTable[i] = (-Fix(0.418687589F)) * i; |
|||
tables.CrBTable[i] = (-Fix(0.081312411F)) * i; |
|||
} |
|||
|
|||
return tables; |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private float CalculateY(byte r, byte g, byte b) |
|||
{ |
|||
// float y = (0.299F * r) + (0.587F * g) + (0.114F * b);
|
|||
return (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private float CalculateCb(byte r, byte g, byte b) |
|||
{ |
|||
// float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
|
|||
return (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private float CalculateCr(byte r, byte g, byte b) |
|||
{ |
|||
// float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
|
|||
return (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts Rgb24 pixels into YCbCr color space with 4:4:4 subsampling sampling of luminance and chroma.
|
|||
/// </summary>
|
|||
/// <param name="rgbSpan">Span of Rgb24 pixel data</param>
|
|||
/// <param name="yBlock">Resulting Y values block</param>
|
|||
/// <param name="cbBlock">Resulting Cb values block</param>
|
|||
/// <param name="crBlock">Resulting Cr values block</param>
|
|||
public void Convert444(Span<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) |
|||
{ |
|||
ref Rgb24 rgbStart = ref rgbSpan[0]; |
|||
|
|||
for (int i = 0; i < Block8x8F.Size; i++) |
|||
{ |
|||
Rgb24 c = Unsafe.Add(ref rgbStart, i); |
|||
|
|||
yBlock[i] = this.CalculateY(c.R, c.G, c.B); |
|||
cbBlock[i] = this.CalculateCb(c.R, c.G, c.B); |
|||
crBlock[i] = this.CalculateCr(c.R, c.G, c.B); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts Rgb24 pixels into YCbCr color space with 4:2:0 subsampling of luminance and chroma.
|
|||
/// </summary>
|
|||
/// <remarks>Calculates 2 out of 4 luminance blocks and half of chroma blocks. This method must be called twice per 4x 8x8 DCT blocks with different row param.</remarks>
|
|||
/// <param name="rgbSpan">Span of Rgb24 pixel data</param>
|
|||
/// <param name="yBlockLeft">First or "left" resulting Y block</param>
|
|||
/// <param name="yBlockRight">Second or "right" resulting Y block</param>
|
|||
/// <param name="cbBlock">Resulting Cb values block</param>
|
|||
/// <param name="crBlock">Resulting Cr values block</param>
|
|||
/// <param name="row">Row index of the 16x16 block, 0 or 1</param>
|
|||
public void Convert420(Span<Rgb24> rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) |
|||
{ |
|||
DebugGuard.MustBeBetweenOrEqualTo(row, 0, 1, nameof(row)); |
|||
|
|||
ref float yBlockLeftRef = ref Unsafe.As<Block8x8F, float>(ref yBlockLeft); |
|||
ref float yBlockRightRef = ref Unsafe.As<Block8x8F, float>(ref yBlockRight); |
|||
|
|||
// 0-31 or 32-63
|
|||
// upper or lower part
|
|||
int chromaWriteOffset = row * (Block8x8F.Size / 2); |
|||
ref float cbBlockRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, float>(ref cbBlock), chromaWriteOffset); |
|||
ref float crBlockRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, float>(ref crBlock), chromaWriteOffset); |
|||
|
|||
ref Rgb24 rgbStart = ref rgbSpan[0]; |
|||
|
|||
for (int i = 0; i < 8; i += 2) |
|||
{ |
|||
int yBlockWriteOffset = i * 8; |
|||
ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, i * 16); |
|||
|
|||
int chromaOffset = 8 * (i / 2); |
|||
|
|||
// left
|
|||
this.ConvertChunk420( |
|||
ref stride, |
|||
ref Unsafe.Add(ref yBlockLeftRef, yBlockWriteOffset), |
|||
ref Unsafe.Add(ref cbBlockRef, chromaOffset), |
|||
ref Unsafe.Add(ref crBlockRef, chromaOffset)); |
|||
|
|||
// right
|
|||
this.ConvertChunk420( |
|||
ref Unsafe.Add(ref stride, 8), |
|||
ref Unsafe.Add(ref yBlockRightRef, yBlockWriteOffset), |
|||
ref Unsafe.Add(ref cbBlockRef, chromaOffset + 4), |
|||
ref Unsafe.Add(ref crBlockRef, chromaOffset + 4)); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, ref float cbBlock, ref float crBlock) |
|||
{ |
|||
// jpeg 8x8 blocks are processed as 16x16 blocks with 16x8 subpasses (this is done for performance reasons)
|
|||
// each row is 16 pixels wide thus +16 stride reference offset
|
|||
// resulting luminance (Y`) are sampled at original resolution thus +8 reference offset
|
|||
for (int k = 0; k < 8; k += 2) |
|||
{ |
|||
ref float yBlockRef = ref Unsafe.Add(ref yBlock, k); |
|||
|
|||
// top row
|
|||
Rgb24 px0 = Unsafe.Add(ref stride, k); |
|||
Rgb24 px1 = Unsafe.Add(ref stride, k + 1); |
|||
yBlockRef = this.CalculateY(px0.R, px0.G, px0.B); |
|||
Unsafe.Add(ref yBlockRef, 1) = this.CalculateY(px1.R, px1.G, px1.B); |
|||
|
|||
// bottom row
|
|||
Rgb24 px2 = Unsafe.Add(ref stride, k + 16); |
|||
Rgb24 px3 = Unsafe.Add(ref stride, k + 17); |
|||
Unsafe.Add(ref yBlockRef, 8) = this.CalculateY(px2.R, px2.G, px2.B); |
|||
Unsafe.Add(ref yBlockRef, 9) = this.CalculateY(px3.R, px3.G, px3.B); |
|||
|
|||
// chroma average for 2x2 pixel block
|
|||
Unsafe.Add(ref cbBlock, k / 2) = this.CalculateAverageCb(px0, px1, px2, px3); |
|||
Unsafe.Add(ref crBlock, k / 2) = this.CalculateAverageCr(px0, px1, px2, px3); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private float CalculateAverageCb(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3) |
|||
{ |
|||
return 0.25f |
|||
* (this.CalculateCb(px0.R, px0.G, px0.B) |
|||
+ this.CalculateCb(px1.R, px1.G, px1.B) |
|||
+ this.CalculateCb(px2.R, px2.G, px2.B) |
|||
+ this.CalculateCb(px3.R, px3.G, px3.B)); |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private float CalculateAverageCr(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3) |
|||
{ |
|||
return 0.25f |
|||
* (this.CalculateCr(px0.R, px0.G, px0.B) |
|||
+ this.CalculateCr(px1.R, px1.G, px1.B) |
|||
+ this.CalculateCr(px2.R, px2.G, px2.B) |
|||
+ this.CalculateCr(px3.R, px3.G, px3.B)); |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private static int Fix(float x) |
|||
=> (int)((x * (1L << ScaleBits)) + 0.5F); |
|||
} |
|||
} |
|||
@ -1,259 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
internal static class RgbToYCbCrConverterVectorized |
|||
{ |
|||
public static bool IsSupported |
|||
{ |
|||
get |
|||
{ |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
return Avx2.IsSupported; |
|||
#else
|
|||
return false; |
|||
#endif
|
|||
} |
|||
} |
|||
|
|||
public static int AvxCompatibilityPadding |
|||
{ |
|||
// rgb byte matrices contain 8 strides by 8 pixels each, thus 64 pixels total
|
|||
// Strides are stored sequentially - one big span of 64 * 3 = 192 bytes
|
|||
// Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits
|
|||
// Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride:
|
|||
// stride 0 0 - 192 -(+64bits)-> 256
|
|||
// stride 1 192 - 384 -(+64bits)-> 448
|
|||
// stride 2 384 - 576 -(+64bits)-> 640
|
|||
// stride 3 576 - 768 -(+64bits)-> 832
|
|||
// stride 4 768 - 960 -(+64bits)-> 1024
|
|||
// stride 5 960 - 1152 -(+64bits)-> 1216
|
|||
// stride 6 1152 - 1344 -(+64bits)-> 1408
|
|||
// stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION
|
|||
//
|
|||
// Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits
|
|||
// This is not permitted - we are reading foreign memory
|
|||
//
|
|||
// 8 byte padding to rgb byte span will solve this problem without extra code in converters
|
|||
get |
|||
{ |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
if (IsSupported) |
|||
{ |
|||
return 8; |
|||
} |
|||
#endif
|
|||
return 0; |
|||
} |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
|
|||
internal static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[] |
|||
{ |
|||
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, |
|||
3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 |
|||
}; |
|||
|
|||
internal static ReadOnlySpan<byte> ExtractRgb => new byte[] |
|||
{ |
|||
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
|||
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF |
|||
}; |
|||
#endif
|
|||
|
|||
/// <summary>
|
|||
/// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:4:4 subsampling
|
|||
/// </summary>
|
|||
/// <remarks>Total size of rgb span must be 200 bytes</remarks>
|
|||
/// <param name="rgbSpan">Span of rgb pixels with size of 64</param>
|
|||
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
|
|||
/// <param name="cbBlock">8x8 destination matrix of Chrominance(Cb) converted data</param>
|
|||
/// <param name="crBlock">8x8 destination matrix of Chrominance(Cr) converted data</param>
|
|||
public static void Convert444(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) |
|||
{ |
|||
Debug.Assert(IsSupported, "AVX2 is required to run this converter"); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
var f0299 = Vector256.Create(0.299f); |
|||
var f0587 = Vector256.Create(0.587f); |
|||
var f0114 = Vector256.Create(0.114f); |
|||
var fn0168736 = Vector256.Create(-0.168736f); |
|||
var fn0331264 = Vector256.Create(-0.331264f); |
|||
var f128 = Vector256.Create(128f); |
|||
var fn0418688 = Vector256.Create(-0.418688f); |
|||
var fn0081312F = Vector256.Create(-0.081312F); |
|||
var f05 = Vector256.Create(0.5f); |
|||
var zero = Vector256.Create(0).AsByte(); |
|||
|
|||
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan)); |
|||
ref Vector256<float> destYRef = ref yBlock.V0; |
|||
ref Vector256<float> destCbRef = ref cbBlock.V0; |
|||
ref Vector256<float> destCrRef = ref crBlock.V0; |
|||
|
|||
var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); |
|||
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb)); |
|||
Vector256<byte> rgb, rg, bx; |
|||
Vector256<float> r, g, b; |
|||
|
|||
const int bytesPerRgbStride = 24; |
|||
for (int i = 0; i < 8; i++) |
|||
{ |
|||
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); |
|||
|
|||
rgb = Avx2.Shuffle(rgb, extractRgbMask); |
|||
|
|||
rg = Avx2.UnpackLow(rgb, zero); |
|||
bx = Avx2.UnpackHigh(rgb, zero); |
|||
|
|||
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); |
|||
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); |
|||
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); |
|||
|
|||
// (0.299F * r) + (0.587F * g) + (0.114F * b);
|
|||
Unsafe.Add(ref destYRef, i) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); |
|||
|
|||
// 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
|
|||
Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); |
|||
|
|||
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
|
|||
Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling
|
|||
/// </summary>
|
|||
public static void Convert420(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) |
|||
{ |
|||
Debug.Assert(IsSupported, "AVX2 is required to run this converter"); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
var f0299 = Vector256.Create(0.299f); |
|||
var f0587 = Vector256.Create(0.587f); |
|||
var f0114 = Vector256.Create(0.114f); |
|||
var fn0168736 = Vector256.Create(-0.168736f); |
|||
var fn0331264 = Vector256.Create(-0.331264f); |
|||
var f128 = Vector256.Create(128f); |
|||
var fn0418688 = Vector256.Create(-0.418688f); |
|||
var fn0081312F = Vector256.Create(-0.081312F); |
|||
var f05 = Vector256.Create(0.5f); |
|||
var zero = Vector256.Create(0).AsByte(); |
|||
|
|||
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan)); |
|||
|
|||
int destOffset = row * 4; |
|||
|
|||
ref Vector256<float> destCbRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, Vector256<float>>(ref cbBlock), destOffset); |
|||
ref Vector256<float> destCrRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, Vector256<float>>(ref crBlock), destOffset); |
|||
|
|||
var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); |
|||
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb)); |
|||
Vector256<byte> rgb, rg, bx; |
|||
Vector256<float> r, g, b; |
|||
|
|||
Span<Vector256<float>> rDataLanes = stackalloc Vector256<float>[4]; |
|||
Span<Vector256<float>> gDataLanes = stackalloc Vector256<float>[4]; |
|||
Span<Vector256<float>> bDataLanes = stackalloc Vector256<float>[4]; |
|||
|
|||
const int bytesPerRgbStride = 24; |
|||
for (int i = 0; i < 4; i++) |
|||
{ |
|||
// 16x2 => 8x1
|
|||
// left 8x8 column conversions
|
|||
for (int j = 0; j < 4; j += 2) |
|||
{ |
|||
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * ((i * 4) + j))).AsUInt32(), extractToLanesMask).AsByte(); |
|||
|
|||
rgb = Avx2.Shuffle(rgb, extractRgbMask); |
|||
|
|||
rg = Avx2.UnpackLow(rgb, zero); |
|||
bx = Avx2.UnpackHigh(rgb, zero); |
|||
|
|||
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); |
|||
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); |
|||
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); |
|||
|
|||
int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); |
|||
|
|||
// (0.299F * r) + (0.587F * g) + (0.114F * b);
|
|||
Unsafe.Add(ref yBlockLeft.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); |
|||
|
|||
rDataLanes[j] = r; |
|||
gDataLanes[j] = g; |
|||
bDataLanes[j] = b; |
|||
} |
|||
|
|||
// 16x2 => 8x1
|
|||
// right 8x8 column conversions
|
|||
for (int j = 1; j < 4; j += 2) |
|||
{ |
|||
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * ((i * 4) + j))).AsUInt32(), extractToLanesMask).AsByte(); |
|||
|
|||
rgb = Avx2.Shuffle(rgb, extractRgbMask); |
|||
|
|||
rg = Avx2.UnpackLow(rgb, zero); |
|||
bx = Avx2.UnpackHigh(rgb, zero); |
|||
|
|||
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); |
|||
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); |
|||
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); |
|||
|
|||
int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); |
|||
|
|||
// (0.299F * r) + (0.587F * g) + (0.114F * b);
|
|||
Unsafe.Add(ref yBlockRight.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); |
|||
|
|||
rDataLanes[j] = r; |
|||
gDataLanes[j] = g; |
|||
bDataLanes[j] = b; |
|||
} |
|||
|
|||
r = Scale16x2_8x1(rDataLanes); |
|||
g = Scale16x2_8x1(gDataLanes); |
|||
b = Scale16x2_8x1(bDataLanes); |
|||
|
|||
// 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
|
|||
Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); |
|||
|
|||
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
|
|||
Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
/// <summary>
|
|||
/// Scales 16x2 matrix to 8x1 using 2x2 average
|
|||
/// </summary>
|
|||
/// <param name="v">Input matrix consisting of 4 256bit vectors</param>
|
|||
/// <returns>256bit vector containing upper and lower scaled parts of the input matrix</returns>
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
internal static Vector256<float> Scale16x2_8x1(ReadOnlySpan<Vector256<float>> v) |
|||
{ |
|||
Debug.Assert(Avx2.IsSupported, "AVX2 is required to run this converter"); |
|||
DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements"); |
|||
|
|||
var f025 = Vector256.Create(0.25f); |
|||
|
|||
Vector256<float> left = Avx.Add(v[0], v[2]); |
|||
Vector256<float> right = Avx.Add(v[1], v[3]); |
|||
Vector256<float> avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025); |
|||
|
|||
return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle(); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|||
@ -1,121 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Advanced; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
/// <summary>
|
|||
/// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel type to work on</typeparam>
|
|||
internal ref struct YCbCrForwardConverter420<TPixel> |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
/// <summary>
|
|||
/// Number of pixels processed per single <see cref="Convert(int, int, ref RowOctet{TPixel}, int)"/> call
|
|||
/// </summary>
|
|||
private const int PixelsPerSample = 16 * 8; |
|||
|
|||
/// <summary>
|
|||
/// Total byte size of processed pixels converted from TPixel to <see cref="Rgb24"/>
|
|||
/// </summary>
|
|||
private const int RgbSpanByteSize = PixelsPerSample * 3; |
|||
|
|||
/// <summary>
|
|||
/// The left Y component
|
|||
/// </summary>
|
|||
public Block8x8F YLeft; |
|||
|
|||
/// <summary>
|
|||
/// The left Y component
|
|||
/// </summary>
|
|||
public Block8x8F YRight; |
|||
|
|||
/// <summary>
|
|||
/// The Cb component
|
|||
/// </summary>
|
|||
public Block8x8F Cb; |
|||
|
|||
/// <summary>
|
|||
/// The Cr component
|
|||
/// </summary>
|
|||
public Block8x8F Cr; |
|||
|
|||
/// <summary>
|
|||
/// The color conversion tables
|
|||
/// </summary>
|
|||
private RgbToYCbCrConverterLut colorTables; |
|||
|
|||
/// <summary>
|
|||
/// Temporal 16x8 block to hold TPixel data
|
|||
/// </summary>
|
|||
private readonly Span<TPixel> pixelSpan; |
|||
|
|||
/// <summary>
|
|||
/// Temporal RGB block
|
|||
/// </summary>
|
|||
private readonly Span<Rgb24> rgbSpan; |
|||
|
|||
/// <summary>
|
|||
/// Sampled pixel buffer size
|
|||
/// </summary>
|
|||
private readonly Size samplingAreaSize; |
|||
|
|||
/// <summary>
|
|||
/// <see cref="Configuration"/> for internal operations
|
|||
/// </summary>
|
|||
private readonly Configuration config; |
|||
|
|||
public YCbCrForwardConverter420(ImageFrame<TPixel> frame) |
|||
{ |
|||
// matrices would be filled during convert calls
|
|||
this.YLeft = default; |
|||
this.YRight = default; |
|||
this.Cb = default; |
|||
this.Cr = default; |
|||
|
|||
// temporal pixel buffers
|
|||
this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); |
|||
this.rgbSpan = MemoryMarshal.Cast<byte, Rgb24>(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan()); |
|||
|
|||
// frame data
|
|||
this.samplingAreaSize = new Size(frame.Width, frame.Height); |
|||
this.config = frame.GetConfiguration(); |
|||
|
|||
// conversion vector fallback data
|
|||
if (!RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
this.colorTables = RgbToYCbCrConverterLut.Create(); |
|||
} |
|||
else |
|||
{ |
|||
this.colorTables = default; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets size of sampling area from given frame pixel buffer.
|
|||
/// </summary>
|
|||
private static Size SampleSize => new(16, 8); |
|||
|
|||
public void Convert(int x, int y, ref RowOctet<TPixel> currentRows, int idx) |
|||
{ |
|||
YCbCrForwardConverter<TPixel>.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); |
|||
|
|||
PixelOperations<TPixel>.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan); |
|||
|
|||
if (RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); |
|||
} |
|||
else |
|||
{ |
|||
this.colorTables.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,122 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Advanced; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
/// <summary>
|
|||
/// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
|
|||
/// </summary>
|
|||
/// <typeparam name="TPixel">The pixel type to work on</typeparam>
|
|||
internal ref struct YCbCrForwardConverter444<TPixel> |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
/// <summary>
|
|||
/// Number of pixels processed per single <see cref="Convert(int, int, ref RowOctet{TPixel})"/> call
|
|||
/// </summary>
|
|||
private const int PixelsPerSample = 8 * 8; |
|||
|
|||
/// <summary>
|
|||
/// Total byte size of processed pixels converted from TPixel to <see cref="Rgb24"/>
|
|||
/// </summary>
|
|||
private const int RgbSpanByteSize = PixelsPerSample * 3; |
|||
|
|||
/// <summary>
|
|||
/// The Y component
|
|||
/// </summary>
|
|||
public Block8x8F Y; |
|||
|
|||
/// <summary>
|
|||
/// The Cb component
|
|||
/// </summary>
|
|||
public Block8x8F Cb; |
|||
|
|||
/// <summary>
|
|||
/// The Cr component
|
|||
/// </summary>
|
|||
public Block8x8F Cr; |
|||
|
|||
/// <summary>
|
|||
/// The color conversion tables
|
|||
/// </summary>
|
|||
private RgbToYCbCrConverterLut colorTables; |
|||
|
|||
/// <summary>
|
|||
/// Temporal 64-byte span to hold unconverted TPixel data
|
|||
/// </summary>
|
|||
private readonly Span<TPixel> pixelSpan; |
|||
|
|||
/// <summary>
|
|||
/// Temporal 64-byte span to hold converted Rgb24 data
|
|||
/// </summary>
|
|||
private readonly Span<Rgb24> rgbSpan; |
|||
|
|||
/// <summary>
|
|||
/// Sampled pixel buffer size
|
|||
/// </summary>
|
|||
private readonly Size samplingAreaSize; |
|||
|
|||
/// <summary>
|
|||
/// <see cref="Configuration"/> for internal operations
|
|||
/// </summary>
|
|||
private readonly Configuration config; |
|||
|
|||
public YCbCrForwardConverter444(ImageFrame<TPixel> frame) |
|||
{ |
|||
// matrices would be filled during convert calls
|
|||
this.Y = default; |
|||
this.Cb = default; |
|||
this.Cr = default; |
|||
|
|||
// temporal pixel buffers
|
|||
this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); |
|||
this.rgbSpan = MemoryMarshal.Cast<byte, Rgb24>(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan()); |
|||
|
|||
// frame data
|
|||
this.samplingAreaSize = new Size(frame.Width, frame.Height); |
|||
this.config = frame.GetConfiguration(); |
|||
|
|||
// conversion vector fallback data
|
|||
if (!RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
this.colorTables = RgbToYCbCrConverterLut.Create(); |
|||
} |
|||
else |
|||
{ |
|||
this.colorTables = default; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Gets size of sampling area from given frame pixel buffer.
|
|||
/// </summary>
|
|||
private static Size SampleSize => new(8, 8); |
|||
|
|||
/// <summary>
|
|||
/// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (<see cref="Y"/>, <see cref="Cb"/>, <see cref="Cr"/>)
|
|||
/// </summary>
|
|||
public void Convert(int x, int y, ref RowOctet<TPixel> currentRows) |
|||
{ |
|||
YCbCrForwardConverter<TPixel>.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); |
|||
|
|||
PixelOperations<TPixel>.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan); |
|||
|
|||
ref Block8x8F yBlock = ref this.Y; |
|||
ref Block8x8F cbBlock = ref this.Cb; |
|||
ref Block8x8F crBlock = ref this.Cr; |
|||
|
|||
if (RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
RgbToYCbCrConverterVectorized.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); |
|||
} |
|||
else |
|||
{ |
|||
this.colorTables.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,61 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
|||
{ |
|||
internal static class YCbCrForwardConverter<TPixel> |
|||
where TPixel : unmanaged, IPixel<TPixel> |
|||
{ |
|||
public static void LoadAndStretchEdges(RowOctet<TPixel> source, Span<TPixel> dest, Point start, Size sampleSize, Size totalSize) |
|||
{ |
|||
DebugGuard.MustBeBetweenOrEqualTo(start.X, 0, totalSize.Width - 1, nameof(start.X)); |
|||
DebugGuard.MustBeBetweenOrEqualTo(start.Y, 0, totalSize.Height - 1, nameof(start.Y)); |
|||
|
|||
int width = Math.Min(sampleSize.Width, totalSize.Width - start.X); |
|||
int height = Math.Min(sampleSize.Height, totalSize.Height - start.Y); |
|||
|
|||
uint byteWidth = (uint)(width * Unsafe.SizeOf<TPixel>()); |
|||
int remainderXCount = sampleSize.Width - width; |
|||
|
|||
ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast<TPixel, byte>(dest)); |
|||
int rowSizeInBytes = sampleSize.Width * Unsafe.SizeOf<TPixel>(); |
|||
|
|||
for (int y = 0; y < height; y++) |
|||
{ |
|||
Span<TPixel> row = source[y]; |
|||
|
|||
ref byte s = ref Unsafe.As<TPixel, byte>(ref row[start.X]); |
|||
ref byte d = ref Unsafe.Add(ref blockStart, y * rowSizeInBytes); |
|||
|
|||
Unsafe.CopyBlock(ref d, ref s, byteWidth); |
|||
|
|||
ref TPixel last = ref Unsafe.Add(ref Unsafe.As<byte, TPixel>(ref d), width - 1); |
|||
|
|||
for (int x = 1; x <= remainderXCount; x++) |
|||
{ |
|||
Unsafe.Add(ref last, x) = last; |
|||
} |
|||
} |
|||
|
|||
int remainderYCount = sampleSize.Height - height; |
|||
|
|||
if (remainderYCount == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
ref byte lastRowStart = ref Unsafe.Add(ref blockStart, (height - 1) * rowSizeInBytes); |
|||
|
|||
for (int y = 1; y <= remainderYCount; y++) |
|||
{ |
|||
ref byte remStart = ref Unsafe.Add(ref lastRowStart, rowSizeInBytes * y); |
|||
Unsafe.CopyBlock(ref remStart, ref lastRowStart, (uint)rowSizeInBytes); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,272 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
using SixLabors.ImageSharp.ColorSpaces; |
|||
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder; |
|||
using SixLabors.ImageSharp.PixelFormats; |
|||
using SixLabors.ImageSharp.Tests.Colorspaces.Conversion; |
|||
using Xunit; |
|||
using Xunit.Abstractions; |
|||
|
|||
// ReSharper disable InconsistentNaming
|
|||
namespace SixLabors.ImageSharp.Tests.Formats.Jpg |
|||
{ |
|||
public class RgbToYCbCrConverterTests |
|||
{ |
|||
public RgbToYCbCrConverterTests(ITestOutputHelper output) |
|||
{ |
|||
this.Output = output; |
|||
} |
|||
|
|||
private ITestOutputHelper Output { get; } |
|||
|
|||
[Fact] |
|||
public void TestConverterLut444() |
|||
{ |
|||
int dataSize = 8 * 8; |
|||
Rgb24[] data = CreateTestData(dataSize); |
|||
var target = RgbToYCbCrConverterLut.Create(); |
|||
|
|||
Block8x8F y = default; |
|||
Block8x8F cb = default; |
|||
Block8x8F cr = default; |
|||
|
|||
target.Convert444(data.AsSpan(), ref y, ref cb, ref cr); |
|||
|
|||
Verify444(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F)); |
|||
} |
|||
|
|||
[Fact] |
|||
public void TestConverterVectorized444() |
|||
{ |
|||
if (!RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
this.Output.WriteLine("No AVX and/or FMA present, skipping test!"); |
|||
return; |
|||
} |
|||
|
|||
int dataSize = 8 * 8; |
|||
Rgb24[] data = CreateTestData(dataSize); |
|||
|
|||
Block8x8F y = default; |
|||
Block8x8F cb = default; |
|||
Block8x8F cr = default; |
|||
|
|||
RgbToYCbCrConverterVectorized.Convert444(data.AsSpan(), ref y, ref cb, ref cr); |
|||
|
|||
Verify444(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F)); |
|||
} |
|||
|
|||
[Fact] |
|||
public void TestConverterLut420() |
|||
{ |
|||
int dataSize = 16 * 16; |
|||
Span<Rgb24> data = CreateTestData(dataSize).AsSpan(); |
|||
var target = RgbToYCbCrConverterLut.Create(); |
|||
|
|||
var yBlocks = new Block8x8F[4]; |
|||
var cb = default(Block8x8F); |
|||
var cr = default(Block8x8F); |
|||
|
|||
target.Convert420(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0); |
|||
target.Convert420(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1); |
|||
|
|||
Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F)); |
|||
} |
|||
|
|||
[Fact] |
|||
public void TestConverterVectorized420() |
|||
{ |
|||
if (!RgbToYCbCrConverterVectorized.IsSupported) |
|||
{ |
|||
this.Output.WriteLine("No AVX and/or FMA present, skipping test!"); |
|||
return; |
|||
} |
|||
|
|||
int dataSize = 16 * 16; |
|||
Span<Rgb24> data = CreateTestData(dataSize).AsSpan(); |
|||
|
|||
var yBlocks = new Block8x8F[4]; |
|||
var cb = default(Block8x8F); |
|||
var cr = default(Block8x8F); |
|||
|
|||
RgbToYCbCrConverterVectorized.Convert420(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0); |
|||
RgbToYCbCrConverterVectorized.Convert420(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1); |
|||
|
|||
Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F)); |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
[Theory] |
|||
[InlineData(1)] |
|||
[InlineData(2)] |
|||
[InlineData(3)] |
|||
public void Scale16x2_8x1(int seed) |
|||
{ |
|||
if (!Avx2.IsSupported) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
Span<float> data = new Random(seed).GenerateRandomFloatArray(Vector256<float>.Count * 4, -1000, 1000); |
|||
|
|||
// Act:
|
|||
Vector256<float> resultVector = RgbToYCbCrConverterVectorized.Scale16x2_8x1(MemoryMarshal.Cast<float, Vector256<float>>(data)); |
|||
ref float result = ref Unsafe.As<Vector256<float>, float>(ref resultVector); |
|||
|
|||
// Assert:
|
|||
// Comparison epsilon is tricky but 10^(-4) is good enough (?)
|
|||
var comparer = new ApproximateFloatComparer(0.0001f); |
|||
for (int i = 0; i < Vector256<float>.Count; i++) |
|||
{ |
|||
float actual = Unsafe.Add(ref result, i); |
|||
float expected = CalculateAverage16x2_8x1(data, i); |
|||
|
|||
Assert.True(comparer.Equals(actual, expected), $"Pos {i}, Expected: {expected}, Actual: {actual}"); |
|||
} |
|||
|
|||
static float CalculateAverage16x2_8x1(Span<float> data, int index) |
|||
{ |
|||
int upIdx = index * 2; |
|||
int lowIdx = (index + 8) * 2; |
|||
return 0.25f * (data[upIdx] + data[upIdx + 1] + data[lowIdx] + data[lowIdx + 1]); |
|||
} |
|||
} |
|||
#endif
|
|||
|
|||
private static void Verify444( |
|||
ReadOnlySpan<Rgb24> data, |
|||
ref Block8x8F yResult, |
|||
ref Block8x8F cbResult, |
|||
ref Block8x8F crResult, |
|||
ApproximateColorSpaceComparer comparer) |
|||
{ |
|||
Block8x8F y = default; |
|||
Block8x8F cb = default; |
|||
Block8x8F cr = default; |
|||
|
|||
RgbToYCbCr(data, ref y, ref cb, ref cr); |
|||
|
|||
for (int i = 0; i < Block8x8F.Size; i++) |
|||
{ |
|||
Assert.True(comparer.Equals(new YCbCr(y[i], cb[i], cr[i]), new YCbCr(yResult[i], cbResult[i], crResult[i])), $"Pos {i}, Expected {y[i]} == {yResult[i]}, {cb[i]} == {cbResult[i]}, {cr[i]} == {crResult[i]}"); |
|||
} |
|||
} |
|||
|
|||
private static void Verify420( |
|||
ReadOnlySpan<Rgb24> data, |
|||
Block8x8F[] yResult, |
|||
ref Block8x8F cbResult, |
|||
ref Block8x8F crResult, |
|||
ApproximateFloatComparer comparer) |
|||
{ |
|||
var trueBlock = default(Block8x8F); |
|||
var cbTrue = new Block8x8F[4]; |
|||
var crTrue = new Block8x8F[4]; |
|||
|
|||
Span<Rgb24> tempData = new Rgb24[8 * 8].AsSpan(); |
|||
|
|||
// top left
|
|||
Copy8x8(data, tempData); |
|||
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[0], ref crTrue[0]); |
|||
VerifyBlock(ref yResult[0], ref trueBlock, comparer); |
|||
|
|||
// top right
|
|||
Copy8x8(data.Slice(8), tempData); |
|||
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[1], ref crTrue[1]); |
|||
VerifyBlock(ref yResult[1], ref trueBlock, comparer); |
|||
|
|||
// bottom left
|
|||
Copy8x8(data.Slice(8 * 16), tempData); |
|||
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[2], ref crTrue[2]); |
|||
VerifyBlock(ref yResult[2], ref trueBlock, comparer); |
|||
|
|||
// bottom right
|
|||
Copy8x8(data.Slice((8 * 16) + 8), tempData); |
|||
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[3], ref crTrue[3]); |
|||
VerifyBlock(ref yResult[3], ref trueBlock, comparer); |
|||
|
|||
// verify Cb
|
|||
Scale16X16To8X8(ref trueBlock, cbTrue); |
|||
VerifyBlock(ref cbResult, ref trueBlock, comparer); |
|||
|
|||
// verify Cr
|
|||
Scale16X16To8X8(ref trueBlock, crTrue); |
|||
VerifyBlock(ref crResult, ref trueBlock, comparer); |
|||
|
|||
// extracts 8x8 blocks from 16x8 memory region
|
|||
static void Copy8x8(ReadOnlySpan<Rgb24> source, Span<Rgb24> dest) |
|||
{ |
|||
for (int i = 0; i < 8; i++) |
|||
{ |
|||
source.Slice(i * 16, 8).CopyTo(dest.Slice(i * 8)); |
|||
} |
|||
} |
|||
|
|||
// scales 16x16 to 8x8, used in chroma subsampling tests
|
|||
static void Scale16X16To8X8(ref Block8x8F dest, ReadOnlySpan<Block8x8F> source) |
|||
{ |
|||
for (int i = 0; i < 4; i++) |
|||
{ |
|||
int dstOff = ((i & 2) << 4) | ((i & 1) << 2); |
|||
Block8x8F iSource = source[i]; |
|||
|
|||
for (int y = 0; y < 4; y++) |
|||
{ |
|||
for (int x = 0; x < 4; x++) |
|||
{ |
|||
int j = (16 * y) + (2 * x); |
|||
float sum = iSource[j] + iSource[j + 1] + iSource[j + 8] + iSource[j + 9]; |
|||
dest[(8 * y) + x + dstOff] = (sum + 2) * .25F; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
private static void RgbToYCbCr(ReadOnlySpan<Rgb24> data, ref Block8x8F y, ref Block8x8F cb, ref Block8x8F cr) |
|||
{ |
|||
for (int i = 0; i < data.Length; i++) |
|||
{ |
|||
int r = data[i].R; |
|||
int g = data[i].G; |
|||
int b = data[i].B; |
|||
|
|||
y[i] = (0.299F * r) + (0.587F * g) + (0.114F * b); |
|||
cb[i] = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); |
|||
cr[i] = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); |
|||
} |
|||
} |
|||
|
|||
private static void VerifyBlock(ref Block8x8F res, ref Block8x8F target, ApproximateFloatComparer comparer) |
|||
{ |
|||
for (int i = 0; i < Block8x8F.Size; i++) |
|||
{ |
|||
Assert.True(comparer.Equals(res[i], target[i]), $"Pos {i}, Expected: {target[i]}, Got: {res[i]}"); |
|||
} |
|||
} |
|||
|
|||
private static Rgb24[] CreateTestData(int size) |
|||
{ |
|||
var data = new Rgb24[size]; |
|||
var r = new Random(); |
|||
|
|||
var random = new byte[3]; |
|||
for (int i = 0; i < data.Length; i++) |
|||
{ |
|||
r.NextBytes(random); |
|||
data[i] = new Rgb24(random[0], random[1], random[2]); |
|||
} |
|||
|
|||
return data; |
|||
} |
|||
} |
|||
} |
|||
Loading…
Reference in new issue