diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs index 7c21d62dd..cd0583361 100644 --- a/src/ImageSharp/Color/Color.cs +++ b/src/ImageSharp/Color/Color.cs @@ -270,8 +270,15 @@ namespace SixLabors.ImageSharp return pixel; } + if (this.boxedHighPrecisionPixel is null) + { + pixel = default; + pixel.FromRgba64(this.data); + return pixel; + } + pixel = default; - pixel.FromRgba64(this.data); + pixel.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4()); return pixel; } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.Intrinsic.cs new file mode 100644 index 000000000..002d382dc --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.Intrinsic.cs @@ -0,0 +1,39 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components +{ + internal unsafe partial struct Block8x8 + { + [FieldOffset(0)] + public Vector128 V0; + [FieldOffset(16)] + public Vector128 V1; + [FieldOffset(32)] + public Vector128 V2; + [FieldOffset(48)] + public Vector128 V3; + [FieldOffset(64)] + public Vector128 V4; + [FieldOffset(80)] + public Vector128 V5; + [FieldOffset(96)] + public Vector128 V6; + [FieldOffset(112)] + public Vector128 V7; + + [FieldOffset(0)] + public Vector256 V01; + [FieldOffset(32)] + public Vector256 V23; + [FieldOffset(64)] + public Vector256 V45; + [FieldOffset(96)] + public Vector256 V67; + } +} +#endif diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs index 27bb2fc3c..4b03f9f7b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// // ReSharper disable once InconsistentNaming [StructLayout(LayoutKind.Explicit)] - internal unsafe struct Block8x8 : IEquatable + internal unsafe partial struct Block8x8 { /// /// A number of scalar coefficients in a @@ -36,34 +36,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components private fixed short data[Size]; #pragma warning restore IDE0051 -#if SUPPORTS_RUNTIME_INTRINSICS - [FieldOffset(0)] - public Vector128 V0; - [FieldOffset(16)] - public Vector128 V1; - [FieldOffset(32)] - public Vector128 V2; - [FieldOffset(48)] - public Vector128 V3; - [FieldOffset(64)] - public Vector128 V4; - [FieldOffset(80)] - public Vector128 V5; - [FieldOffset(96)] - public Vector128 V6; - [FieldOffset(112)] - public Vector128 V7; - - [FieldOffset(0)] - public Vector256 V01; - [FieldOffset(32)] - public Vector256 V23; - [FieldOffset(64)] - public Vector256 V45; - [FieldOffset(96)] - public Vector256 V67; -#endif - /// /// Gets or sets a value at the given index /// @@ -102,74 +74,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components set => this[(y * 8) + x] = value; } - public static bool operator ==(Block8x8 left, Block8x8 right) => left.Equals(right); - - public static bool operator !=(Block8x8 left, Block8x8 right) => !left.Equals(right); - - /// - /// Multiply all elements by a given - /// - public static Block8x8 operator *(Block8x8 block, int value) - { - Block8x8 result = block; - for (int i = 0; i < Size; i++) - { - int val = result[i]; - val *= value; - result[i] = (short)val; - } - - return result; - } - - /// - /// Divide all elements by a given - /// - public static Block8x8 operator /(Block8x8 block, int value) - { - Block8x8 result = block; - for (int i = 0; i < Size; i++) - { - int val = result[i]; - val /= value; - result[i] = (short)val; - } - - return result; - } - - /// - /// Add an to all elements - /// - public static Block8x8 operator +(Block8x8 block, int value) - { - Block8x8 result = block; - for (int i = 0; i < Size; i++) - { - int val = result[i]; - val += value; - result[i] = (short)val; - } - - return result; - } - - /// - /// Subtract an from all elements - /// - public static Block8x8 operator -(Block8x8 block, int value) - { - Block8x8 result = block; - for (int i = 0; i < Size; i++) - { - int val = result[i]; - val -= value; - result[i] = (short)val; - } - - return result; - } - public static Block8x8 Load(Span data) { Unsafe.SkipInit(out Block8x8 result); @@ -260,26 +164,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components return sb.ToString(); } - /// - public bool Equals(Block8x8 other) - { - for (int i = 0; i < Size; i++) - { - if (this[i] != other[i]) - { - return false; - } - } - - return true; - } - - /// - public override bool Equals(object obj) => obj is Block8x8 other && this.Equals(other); - - /// - public override int GetHashCode() => (this[0] * 31) + this[1]; - /// /// Returns index of the last non-zero element in given matrix. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 02f5a1324..f25286447 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -98,58 +97,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components set => this[(y * 8) + x] = value; } - public static Block8x8F operator *(Block8x8F block, float value) - { - Block8x8F result = block; - for (int i = 0; i < Size; i++) - { - float val = result[i]; - val *= value; - result[i] = val; - } - - return result; - } - - public static Block8x8F operator /(Block8x8F block, float value) - { - Block8x8F result = block; - for (int i = 0; i < Size; i++) - { - float val = result[i]; - val /= value; - result[i] = val; - } - - return result; - } - - public static Block8x8F operator +(Block8x8F block, float value) - { - Block8x8F result = block; - for (int i = 0; i < Size; i++) - { - float val = result[i]; - val += value; - result[i] = val; - } - - return result; - } - - public static Block8x8F operator -(Block8x8F block, float value) - { - Block8x8F result = block; - for (int i = 0; i < Size; i++) - { - float val = result[i]; - val -= value; - result[i] = val; - } - - return result; - } - public static Block8x8F Load(Span data) { Block8x8F result = default; @@ -157,13 +104,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components return result; } - public static Block8x8F Load(Span data) - { - Block8x8F result = default; - result.LoadFrom(data); - return result; - } - /// /// Load raw 32bit floating point data from source. /// @@ -177,15 +117,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Unsafe.CopyBlock(ref d, ref s, Size * sizeof(float)); } - /// - /// Load raw 32bit floating point data from source. - /// - /// Block pointer - /// Source - [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void LoadFrom(Block8x8F* blockPtr, Span source) - => blockPtr->LoadFrom(source); - /// /// Load raw 32bit floating point data from source /// @@ -202,44 +133,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } } - /// - /// Copy raw 32bit floating point data to dest, - /// - /// Destination - [MethodImpl(InliningOptions.ShortMethod)] - public void ScaledCopyTo(Span dest) - { - ref byte d = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); - ref byte s = ref Unsafe.As(ref this); - - Unsafe.CopyBlock(ref d, ref s, Size * sizeof(float)); - } - - /// - /// Convert scalars to byte-s and copy to dest, - /// - /// Pointer to block - /// Destination - [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span dest) - { - float* fPtr = (float*)blockPtr; - for (int i = 0; i < Size; i++) - { - dest[i] = (byte)*fPtr; - fPtr++; - } - } - - /// - /// Copy raw 32bit floating point data to dest. - /// - /// The block pointer. - /// The destination. - [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span dest) - => blockPtr->ScaledCopyTo(dest); - /// /// Copy raw 32bit floating point data to dest /// @@ -253,22 +146,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } } - /// - /// Copy raw 32bit floating point data to dest - /// - /// Destination - public unsafe void ScaledCopyTo(Span dest) - { - fixed (Vector4* ptr = &this.V0L) - { - var fp = (float*)ptr; - for (int i = 0; i < Size; i++) - { - dest[i] = (int)fp[i]; - } - } - } - public float[] ToArray() { float[] result = new float[Size]; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 535514c88..79eedf2f7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -197,6 +197,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// public readonly Span Component3; + /// + /// Initializes a new instance of the struct. + /// + /// The 1-4 sized list of component post processors. + /// The row to convert + public ComponentValues(IReadOnlyList componentProcessors, int row) + { + this.ComponentCount = componentProcessors.Count; + + this.Component0 = componentProcessors[0].GetColorBufferRowSpan(row); + + // In case of grayscale, Component1 and Component2 point to Component0 memory area + this.Component1 = this.ComponentCount > 1 ? componentProcessors[1].GetColorBufferRowSpan(row) : this.Component0; + this.Component2 = this.ComponentCount > 2 ? componentProcessors[2].GetColorBufferRowSpan(row) : this.Component0; + this.Component3 = this.ComponentCount > 3 ? componentProcessors[3].GetColorBufferRowSpan(row) : Span.Empty; + } + /// /// Initializes a new instance of the struct. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs index 0b80acc5d..33815e539 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs @@ -5,7 +5,6 @@ using System; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder { - /// /// /// Represents decompressed, unprocessed jpeg data with spectral space -s. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs deleted file mode 100644 index 15f212b40..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Memory; - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder -{ - /// - /// Encapsulates the implementation of processing "raw" jpeg buffers into Jpeg image channels. - /// - [StructLayout(LayoutKind.Sequential)] - internal struct JpegBlockPostProcessor - { - /// - /// Source block - /// - public Block8x8F SourceBlock; - - /// - /// The quantization table as . - /// - public Block8x8F DequantiazationTable; - - /// - /// Defines the horizontal and vertical scale we need to apply to the 8x8 sized block. - /// - private Size subSamplingDivisors; - - /// - /// Initializes a new instance of the struct. - /// - /// The raw jpeg data. - /// The raw component. - public JpegBlockPostProcessor(IRawJpegData decoder, IJpegComponent component) - { - int qtIndex = component.QuantizationTableIndex; - this.DequantiazationTable = decoder.QuantizationTables[qtIndex]; - this.subSamplingDivisors = component.SubSamplingDivisors; - - this.SourceBlock = default; - } - - /// - /// Processes 'sourceBlock' producing Jpeg color channel values from spectral values: - /// - Dequantize - /// - Applying IDCT - /// - Level shift by +maximumValue/2, clip to [0, maximumValue] - /// - Copy the resulting color values into 'destArea' scaling up the block by amount defined in . - /// - /// The source block. - /// Reference to the origin of the destination pixel area. - /// The width of the destination pixel buffer. - /// The maximum value derived from the bitdepth. - public void ProcessBlockColorsInto( - ref Block8x8 sourceBlock, - ref float destAreaOrigin, - int destAreaStride, - float maximumValue) - { - ref Block8x8F block = ref this.SourceBlock; - block.LoadFrom(ref sourceBlock); - - // Dequantize: - block.MultiplyInPlace(ref this.DequantiazationTable); - - FastFloatingPointDCT.TransformIDCT(ref block); - - // To conform better to libjpeg we actually NEED TO loose precision here. - // This is because they store blocks as Int16 between all the operations. - // To be "more accurate", we need to emulate this by rounding! - block.NormalizeColorsAndRoundInPlace(maximumValue); - - block.ScaledCopyTo( - ref destAreaOrigin, - destAreaStride, - this.subSamplingDivisors.Width, - this.subSamplingDivisors.Height); - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponentPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponentPostProcessor.cs index 527d2c030..c3bf1cbdd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponentPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponentPostProcessor.cs @@ -11,11 +11,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder /// internal class JpegComponentPostProcessor : IDisposable { - /// - /// Points to the current row in . - /// - private int currentComponentRowInBlocks; - /// /// The size of the area in corresponding to one 8x8 Jpeg block /// @@ -26,6 +21,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder /// private readonly JpegFrame frame; + /// + /// Gets the maximal number of block rows being processed in one step. + /// + private readonly int blockRowsPerStep; + + /// + /// Gets the component containing decoding meta information. + /// + private readonly IJpegComponent component; + + /// + /// Gets the instance containing decoding meta information. + /// + private readonly IRawJpegData rawJpeg; + /// /// Initializes a new instance of the class. /// @@ -33,98 +43,87 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder { this.frame = frame; - this.Component = component; - this.RawJpeg = rawJpeg; - this.blockAreaSize = this.Component.SubSamplingDivisors * 8; + this.component = component; + this.rawJpeg = rawJpeg; + this.blockAreaSize = this.component.SubSamplingDivisors * 8; this.ColorBuffer = memoryAllocator.Allocate2DOveraligned( postProcessorBufferSize.Width, postProcessorBufferSize.Height, this.blockAreaSize.Height); - this.BlockRowsPerStep = postProcessorBufferSize.Height / 8 / this.Component.SubSamplingDivisors.Height; + this.blockRowsPerStep = postProcessorBufferSize.Height / 8 / this.component.SubSamplingDivisors.Height; } - public IRawJpegData RawJpeg { get; } - - /// - /// Gets the - /// - public IJpegComponent Component { get; } - /// /// Gets the temporary working buffer of color values. /// public Buffer2D ColorBuffer { get; } - /// - /// Gets - /// - public Size SizeInBlocks => this.Component.SizeInBlocks; - - /// - /// Gets the maximal number of block rows being processed in one step. - /// - public int BlockRowsPerStep { get; } - /// public void Dispose() => this.ColorBuffer.Dispose(); /// - /// Invoke for block rows, copy the result into . + /// Convert raw spectral DCT data to color data and copy it to the color buffer . /// - public void CopyBlocksToColorBuffer(int step) + public void CopyBlocksToColorBuffer(int spectralStep) { - Buffer2D spectralBuffer = this.Component.SpectralBlocks; - - var blockPp = new JpegBlockPostProcessor(this.RawJpeg, this.Component); + Buffer2D spectralBuffer = this.component.SpectralBlocks; float maximumValue = this.frame.MaxColorChannelValue; int destAreaStride = this.ColorBuffer.Width; - int yBlockStart = step * this.BlockRowsPerStep; + int yBlockStart = spectralStep * this.blockRowsPerStep; - for (int y = 0; y < this.BlockRowsPerStep; y++) - { - int yBlock = yBlockStart + y; + Size subSamplingDivisors = this.component.SubSamplingDivisors; - if (yBlock >= spectralBuffer.Height) - { - break; - } + Block8x8F dequantTable = this.rawJpeg.QuantizationTables[this.component.QuantizationTableIndex]; + Block8x8F workspaceBlock = default; + for (int y = 0; y < this.blockRowsPerStep; y++) + { int yBuffer = y * this.blockAreaSize.Height; Span colorBufferRow = this.ColorBuffer.DangerousGetRowSpan(yBuffer); - Span blockRow = spectralBuffer.DangerousGetRowSpan(yBlock); + Span blockRow = spectralBuffer.DangerousGetRowSpan(yBlockStart + y); - // see: https://github.com/SixLabors/ImageSharp/issues/824 - int widthInBlocks = Math.Min(spectralBuffer.Width, this.SizeInBlocks.Width); - - for (int xBlock = 0; xBlock < widthInBlocks; xBlock++) + for (int xBlock = 0; xBlock < spectralBuffer.Width; xBlock++) { - ref Block8x8 block = ref blockRow[xBlock]; - int xBuffer = xBlock * this.blockAreaSize.Width; - ref float destAreaOrigin = ref colorBufferRow[xBuffer]; - - blockPp.ProcessBlockColorsInto(ref block, ref destAreaOrigin, destAreaStride, maximumValue); + // Integer to float + workspaceBlock.LoadFrom(ref blockRow[xBlock]); + + // Dequantize + workspaceBlock.MultiplyInPlace(ref dequantTable); + + // Convert from spectral to color + FastFloatingPointDCT.TransformIDCT(ref workspaceBlock); + + // To conform better to libjpeg we actually NEED TO loose precision here. + // This is because they store blocks as Int16 between all the operations. + // To be "more accurate", we need to emulate this by rounding! + workspaceBlock.NormalizeColorsAndRoundInPlace(maximumValue); + + // Write to color buffer acording to sampling factors + int xColorBufferStart = xBlock * this.blockAreaSize.Width; + workspaceBlock.ScaledCopyTo( + ref colorBufferRow[xColorBufferStart], + destAreaStride, + subSamplingDivisors.Width, + subSamplingDivisors.Height); } } } public void ClearSpectralBuffers() { - Buffer2D spectralBlocks = this.Component.SpectralBlocks; + Buffer2D spectralBlocks = this.component.SpectralBlocks; for (int i = 0; i < spectralBlocks.Height; i++) { spectralBlocks.DangerousGetRowSpan(i).Clear(); } } - public void CopyBlocksToColorBuffer() - { - this.CopyBlocksToColorBuffer(this.currentComponentRowInBlocks); - this.currentComponentRowInBlocks += this.BlockRowsPerStep; - } + public Span GetColorBufferRowSpan(int row) => + this.ColorBuffer.DangerousGetRowSpan(row); } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs index 4e74f6226..1eb74ff80 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs @@ -6,11 +6,8 @@ using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder { /// - /// Converter used to convert jpeg spectral data. + /// Converter used to convert jpeg spectral data to color pixels. /// - /// - /// This is tightly coupled with and . - /// internal abstract class SpectralConverter { /// @@ -30,12 +27,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder public abstract void InjectFrameData(JpegFrame frame, IRawJpegData jpegData); /// - /// Called once per spectral stride for each component in . - /// This is called only for baseline interleaved jpegs. + /// Converts single spectral jpeg stride to color stride in baseline + /// decoding mode. /// /// + /// Called once per decoded spectral stride in + /// only for baseline interleaved jpeg images. /// Spectral 'stride' doesn't particularly mean 'single stride'. - /// Actual stride height depends on the subsampling factor of the given component. + /// Actual stride height depends on the subsampling factor of the given image. /// public abstract void ConvertStrideBaseline(); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs index 0c551bc36..5edcf565c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs @@ -4,7 +4,6 @@ using System; using System.Buffers; using System.Linq; -using System.Numerics; using System.Threading; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; using SixLabors.ImageSharp.Memory; @@ -12,35 +11,78 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder { + /// + /// + /// Color decoding scheme: + /// + /// + /// 1. Decode spectral data to Jpeg color space + /// 2. Convert from Jpeg color space to RGB + /// 3. Convert from RGB to target pixel space + /// + /// + /// internal class SpectralConverter : SpectralConverter, IDisposable where TPixel : unmanaged, IPixel { + /// + /// instance associated with current + /// decoding routine. + /// private readonly Configuration configuration; - private readonly CancellationToken cancellationToken; - + /// + /// Jpeg component converters from decompressed spectral to color data. + /// private JpegComponentPostProcessor[] componentProcessors; + /// + /// Color converter from jpeg color space to target pixel color space. + /// private JpegColorConverter colorConverter; - // private IMemoryOwner rgbaBuffer; + /// + /// Intermediate buffer of RGB components used in color conversion. + /// private IMemoryOwner rgbBuffer; + /// + /// Proxy buffer used in packing from RGB to target TPixel pixels. + /// private IMemoryOwner paddedProxyPixelRow; + /// + /// Resulting 2D pixel buffer. + /// private Buffer2D pixelBuffer; + /// + /// How many pixel rows are processed in one 'stride'. + /// private int pixelRowsPerStep; + /// + /// How many pixel rows were processed. + /// private int pixelRowCounter; - public SpectralConverter(Configuration configuration, CancellationToken cancellationToken) - { + /// + /// Initializes a new instance of the class. + /// + /// The configuration. + public SpectralConverter(Configuration configuration) => this.configuration = configuration; - this.cancellationToken = cancellationToken; - } - public Buffer2D GetPixelBuffer() + /// + /// Gets converted pixel buffer. + /// + /// + /// For non-baseline interleaved jpeg this method does a 'lazy' spectral + /// conversion from spectral to color. + /// + /// Cancellation token. + /// Pixel buffer. + public Buffer2D GetPixelBuffer(CancellationToken cancellationToken) { if (!this.Converted) { @@ -48,7 +90,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder for (int step = 0; step < steps; step++) { - this.cancellationToken.ThrowIfCancellationRequested(); + cancellationToken.ThrowIfCancellationRequested(); this.ConvertStride(step); } } @@ -102,30 +144,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder } } - /// - public void Dispose() - { - if (this.componentProcessors != null) - { - foreach (JpegComponentPostProcessor cpp in this.componentProcessors) - { - cpp.Dispose(); - } - } - - this.rgbBuffer?.Dispose(); - this.paddedProxyPixelRow?.Dispose(); - } - + /// + /// Converts single spectral jpeg stride to color stride. + /// + /// Spectral stride index. private void ConvertStride(int spectralStep) { int maxY = Math.Min(this.pixelBuffer.Height, this.pixelRowCounter + this.pixelRowsPerStep); - var buffers = new Buffer2D[this.componentProcessors.Length]; for (int i = 0; i < this.componentProcessors.Length; i++) { this.componentProcessors[i].CopyBlocksToColorBuffer(spectralStep); - buffers[i] = this.componentProcessors[i].ColorBuffer; } int width = this.pixelBuffer.Width; @@ -134,7 +163,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder { int y = yy - this.pixelRowCounter; - var values = new JpegColorConverter.ComponentValues(buffers, y); + var values = new JpegColorConverter.ComponentValues(this.componentProcessors, y); this.colorConverter.ConvertToRgbInplace(values); values = values.Slice(0, width); // slice away Jpeg padding @@ -164,5 +193,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder this.pixelRowCounter += this.pixelRowsPerStep; } + + /// + public void Dispose() + { + if (this.componentProcessors != null) + { + foreach (JpegComponentPostProcessor cpp in this.componentProcessors) + { + cpp.Dispose(); + } + } + + this.rgbBuffer?.Dispose(); + this.paddedProxyPixelRow?.Dispose(); + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index b3cdbf0a0..d71a62ec9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -278,11 +278,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // ReSharper disable once InconsistentNaming int prevDCY = 0; - var pixelConverter = LuminanceForwardConverter.Create(); ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; RowOctet currentRows = default; + var pixelConverter = new LuminanceForwardConverter(frame); + for (int y = 0; y < pixels.Height; y += 8) { cancellationToken.ThrowIfCancellationRequested(); @@ -290,7 +291,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert(frame, x, y, ref currentRows); + pixelConverter.Convert(x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs index fc5b9a868..6c402fcfd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs @@ -3,6 +3,7 @@ using System; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -15,39 +16,63 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct LuminanceForwardConverter where TPixel : unmanaged, IPixel { + /// + /// Number of pixels processed per single call + /// + private const int PixelsPerSample = 8 * 8; + /// /// The Y component /// public Block8x8F Y; /// - /// Temporal 8x8 block to hold TPixel data + /// Temporal 64-pixel span to hold unconverted TPixel data. + /// + private readonly Span pixelSpan; + + /// + /// Temporal 64-byte span to hold converted data. + /// + private readonly Span l8Span; + + /// + /// Sampled pixel buffer size. /// - private GenericBlock8x8 pixelBlock; + private readonly Size samplingAreaSize; /// - /// Temporal RGB block + /// for internal operations. /// - private GenericBlock8x8 l8Block; + private readonly Configuration config; - public static LuminanceForwardConverter Create() + public LuminanceForwardConverter(ImageFrame frame) { - var result = default(LuminanceForwardConverter); - return result; + this.Y = default; + + this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); + this.l8Span = new L8[PixelsPerSample].AsSpan(); + + this.samplingAreaSize = new Size(frame.Width, frame.Height); + this.config = frame.GetConfiguration(); } + /// + /// Gets size of sampling area from given frame pixel buffer. + /// + private static Size SampleSize => new(8, 8); + /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure () /// - public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) + public void Convert(int x, int y, ref RowOctet currentRows) { - this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); - Span l8Span = this.l8Block.AsSpanUnsafe(); - PixelOperations.Instance.ToL8(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), l8Span); + PixelOperations.Instance.ToL8(this.config, this.pixelSpan, this.l8Span); ref Block8x8F yBlock = ref this.Y; - ref L8 l8Start = ref l8Span[0]; + ref L8 l8Start = ref MemoryMarshal.GetReference(this.l8Span); for (int i = 0; i < Block8x8F.Size; i++) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs index 0be1076e2..789277d7d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs @@ -26,11 +26,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private const int RgbSpanByteSize = PixelsPerSample * 3; - /// - /// of sampling area from given frame pixel buffer. - /// - private static readonly Size SampleSize = new Size(8, 8); - /// /// The Red component. /// @@ -81,6 +76,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.config = frame.GetConfiguration(); } + /// + /// Gets size of sampling area from given frame pixel buffer. + /// + private static Size SampleSize => new(8, 8); + /// /// Converts a 8x8 image area inside 'pixels' at position (x, y) to Rgb24. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index bfeafcbb3..3a878f3c6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -25,11 +25,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private const int RgbSpanByteSize = PixelsPerSample * 3; - /// - /// of sampling area from given frame pixel buffer - /// - private static readonly Size SampleSize = new Size(16, 8); - /// /// The left Y component /// @@ -102,6 +97,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } + /// + /// Gets size of sampling area from given frame pixel buffer. + /// + private static Size SampleSize => new(16, 8); + public void Convert(int x, int y, ref RowOctet currentRows, int idx) { YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 2dbd1a2dc..5f7725bdd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -25,11 +25,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private const int RgbSpanByteSize = PixelsPerSample * 3; - /// - /// of sampling area from given frame pixel buffer - /// - private static readonly Size SampleSize = new Size(8, 8); - /// /// The Y component /// @@ -96,6 +91,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } + /// + /// Gets size of sampling area from given frame pixel buffer. + /// + private static Size SampleSize => new(8, 8); + /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.Generated.cs deleted file mode 100644 index 213c48ff3..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.Generated.cs +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -// -namespace SixLabors.ImageSharp.Formats.Jpeg.Components -{ - internal unsafe partial struct GenericBlock8x8 - { - #pragma warning disable 169 - - // It's not allowed use fix-sized buffers with generics, need to place all the fields manually: - private T _y0_x0, _y0_x1, _y0_x2, _y0_x3, _y0_x4, _y0_x5, _y0_x6, _y0_x7; - private T _y1_x0, _y1_x1, _y1_x2, _y1_x3, _y1_x4, _y1_x5, _y1_x6, _y1_x7; - private T _y2_x0, _y2_x1, _y2_x2, _y2_x3, _y2_x4, _y2_x5, _y2_x6, _y2_x7; - private T _y3_x0, _y3_x1, _y3_x2, _y3_x3, _y3_x4, _y3_x5, _y3_x6, _y3_x7; - private T _y4_x0, _y4_x1, _y4_x2, _y4_x3, _y4_x4, _y4_x5, _y4_x6, _y4_x7; - private T _y5_x0, _y5_x1, _y5_x2, _y5_x3, _y5_x4, _y5_x5, _y5_x6, _y5_x7; - private T _y6_x0, _y6_x1, _y6_x2, _y6_x3, _y6_x4, _y6_x5, _y6_x6, _y6_x7; - private T _y7_x0, _y7_x1, _y7_x2, _y7_x3, _y7_x4, _y7_x5, _y7_x6, _y7_x7; - - #pragma warning restore 169 - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.Generated.tt deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs b/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs deleted file mode 100644 index 42c01d770..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.PixelFormats; - -// ReSharper disable InconsistentNaming -namespace SixLabors.ImageSharp.Formats.Jpeg.Components -{ - /// - /// A generic 8x8 block implementation, useful for manipulating custom 8x8 pixel data. - /// - [StructLayout(LayoutKind.Sequential)] - internal unsafe partial struct GenericBlock8x8 - where T : unmanaged - { - public const int Size = 64; - - /// - /// FOR TESTING ONLY! - /// Gets or sets a value at the given index - /// - /// The index - /// The value - public T this[int idx] - { - get - { - ref T selfRef = ref Unsafe.As, T>(ref this); - return Unsafe.Add(ref selfRef, idx); - } - - set - { - ref T selfRef = ref Unsafe.As, T>(ref this); - Unsafe.Add(ref selfRef, idx) = value; - } - } - - /// - /// FOR TESTING ONLY! - /// Gets or sets a value in a row+column of the 8x8 block - /// - /// The x position index in the row - /// The column index - /// The value - public T this[int x, int y] - { - get => this[(y * 8) + x]; - set => this[(y * 8) + x] = value; - } - - /// - /// Load a 8x8 region of an image into the block. - /// The "outlying" area of the block will be stretched out with pixels on the right and bottom edge of the image. - /// - public void LoadAndStretchEdges(Buffer2D source, int sourceX, int sourceY, ref RowOctet currentRows) - { - int width = Math.Min(8, source.Width - sourceX); - int height = Math.Min(8, source.Height - sourceY); - - if (width <= 0 || height <= 0) - { - return; - } - - uint byteWidth = (uint)width * (uint)Unsafe.SizeOf(); - int remainderXCount = 8 - width; - - ref byte blockStart = ref Unsafe.As, byte>(ref this); - int blockRowSizeInBytes = 8 * Unsafe.SizeOf(); - - for (int y = 0; y < height; y++) - { - Span row = currentRows[y]; - - ref byte s = ref Unsafe.As(ref row[sourceX]); - ref byte d = ref Unsafe.Add(ref blockStart, y * blockRowSizeInBytes); - - Unsafe.CopyBlock(ref d, ref s, byteWidth); - - ref T last = ref Unsafe.Add(ref Unsafe.As(ref d), width - 1); - - for (int x = 1; x <= remainderXCount; x++) - { - Unsafe.Add(ref last, x) = last; - } - } - - int remainderYCount = 8 - height; - - if (remainderYCount == 0) - { - return; - } - - ref byte lastRowStart = ref Unsafe.Add(ref blockStart, (height - 1) * blockRowSizeInBytes); - - for (int y = 1; y <= remainderYCount; y++) - { - ref byte remStart = ref Unsafe.Add(ref lastRowStart, blockRowSizeInBytes * y); - Unsafe.CopyBlock(ref remStart, ref lastRowStart, (uint)blockRowSizeInBytes); - } - } - - /// - /// Only for on-stack instances! - /// - public Span AsSpanUnsafe() - { -#if SUPPORTS_CREATESPAN - Span> s = MemoryMarshal.CreateSpan(ref this, 1); - return MemoryMarshal.Cast, T>(s); -#else - return new Span(Unsafe.AsPointer(ref this), Size); -#endif - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/JpegDecoder.cs b/src/ImageSharp/Formats/Jpeg/JpegDecoder.cs index be03a7e7b..18212ffc7 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegDecoder.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegDecoder.cs @@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// public async Task DecodeAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken) - => await this.DecodeAsync(configuration, stream, cancellationToken) + => await this.DecodeAsync(configuration, stream, cancellationToken) .ConfigureAwait(false); /// diff --git a/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs index 73763f4ab..4be6731cc 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs @@ -175,7 +175,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg public Image Decode(BufferedReadStream stream, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - using var spectralConverter = new SpectralConverter(this.Configuration, cancellationToken); + using var spectralConverter = new SpectralConverter(this.Configuration); var scanDecoder = new HuffmanScanDecoder(stream, spectralConverter, cancellationToken); @@ -185,7 +185,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.InitIptcProfile(); this.InitDerivedMetadataProperties(); - return new Image(this.Configuration, spectralConverter.GetPixelBuffer(), this.Metadata); + return new Image( + this.Configuration, + spectralConverter.GetPixelBuffer(cancellationToken), + this.Metadata); } /// diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/JpegTiffCompression.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/JpegTiffCompression.cs index 2bc606eaf..ce7820ccf 100644 --- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/JpegTiffCompression.cs +++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/JpegTiffCompression.cs @@ -55,17 +55,17 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors { using var jpegDecoder = new JpegDecoderCore(this.configuration, new JpegDecoder()); - // TODO: Should we pass through the CancellationToken from the tiff decoder? // If the PhotometricInterpretation is YCbCr we explicitly assume the JPEG data is in RGB color space. // There seems no other way to determine that the JPEG data is RGB colorspace (no APP14 marker, componentId's are not RGB). using SpectralConverter spectralConverter = this.photometricInterpretation == TiffPhotometricInterpretation.YCbCr ? - new RgbJpegSpectralConverter(this.configuration, CancellationToken.None) : new SpectralConverter(this.configuration, CancellationToken.None); + new RgbJpegSpectralConverter(this.configuration) : new SpectralConverter(this.configuration); var scanDecoder = new HuffmanScanDecoder(stream, spectralConverter, CancellationToken.None); jpegDecoder.LoadTables(this.jpegTables, scanDecoder); scanDecoder.ResetInterval = 0; jpegDecoder.ParseStream(stream, scanDecoder, CancellationToken.None); - CopyImageBytesToBuffer(buffer, spectralConverter.GetPixelBuffer()); + // TODO: Should we pass through the CancellationToken from the tiff decoder? + CopyImageBytesToBuffer(buffer, spectralConverter.GetPixelBuffer(CancellationToken.None)); } else { diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/RgbJpegSpectralConverter.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/RgbJpegSpectralConverter.cs index aefec7fa3..3b5833c10 100644 --- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/RgbJpegSpectralConverter.cs +++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/RgbJpegSpectralConverter.cs @@ -21,9 +21,8 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors /// This Spectral converter will always convert the pixel data to RGB color. /// /// The configuration. - /// The cancellation token. - public RgbJpegSpectralConverter(Configuration configuration, CancellationToken cancellationToken) - : base(configuration, cancellationToken) + public RgbJpegSpectralConverter(Configuration configuration) + : base(configuration) { } diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index b8986f66f..1b9e1f3ca 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -17,6 +17,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { #if SUPPORTS_RUNTIME_INTRINSICS private static readonly Vector128 Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte(); + + private static readonly Vector128 K1 = Vector128.Create((short)20091); + + private static readonly Vector128 K2 = Vector128.Create((short)-30068); + + private static readonly Vector128 Four = Vector128.Create((short)4); + #endif // Note: method name in libwebp reference implementation is called VP8SSE16x16. @@ -32,15 +39,48 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static int Vp8_Sse4X4(Span a, Span b) { #if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + // Load values. + ref byte aRef = ref MemoryMarshal.GetReference(a); + ref byte bRef = ref MemoryMarshal.GetReference(b); + var a0 = Vector256.Create( + Unsafe.As>(ref aRef), + Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps))); + var a1 = Vector256.Create( + Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 2)), + Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 3))); + var b0 = Vector256.Create( + Unsafe.As>(ref bRef), + Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps))); + var b1 = Vector256.Create( + Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 2)), + Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 3))); + + // Combine pair of lines. + Vector256 a01 = Avx2.UnpackLow(a0.AsInt32(), a1.AsInt32()); + Vector256 b01 = Avx2.UnpackLow(b0.AsInt32(), b1.AsInt32()); + + // Convert to 16b. + Vector256 a01s = Avx2.UnpackLow(a01.AsByte(), Vector256.Zero); + Vector256 b01s = Avx2.UnpackLow(b01.AsByte(), Vector256.Zero); + + // subtract, square and accumulate. + Vector256 d0 = Avx2.SubtractSaturate(a01s, b01s); + Vector256 e0 = Avx2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); + + return Numerics.ReduceSum(e0); + } + if (Sse2.IsSupported) { // Load values. ref byte aRef = ref MemoryMarshal.GetReference(a); + ref byte bRef = ref MemoryMarshal.GetReference(b); Vector128 a0 = Unsafe.As>(ref aRef); Vector128 a1 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps)); Vector128 a2 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 2)); Vector128 a3 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 3)); - ref byte bRef = ref MemoryMarshal.GetReference(b); Vector128 b0 = Unsafe.As>(ref bRef); Vector128 b1 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps)); Vector128 b2 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 2)); @@ -67,7 +107,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return Numerics.ReduceSum(sum); } - else #endif { return Vp8_SseNxN(a, b, 4, 4); @@ -788,57 +827,325 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } #endif + // Transforms (Paragraph 14.4). + // Does two transforms. public static void TransformTwo(Span src, Span dst, Span scratch) { - TransformOne(src, dst, scratch); - TransformOne(src.Slice(16), dst.Slice(4), scratch); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + // This implementation makes use of 16-bit fixed point versions of two + // multiply constants: + // K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16 + // K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16 + // + // To be able to use signed 16-bit integers, we use the following trick to + // have constants within range: + // - Associated constants are obtained by subtracting the 16-bit fixed point + // version of one: + // k = K - (1 << 16) => K = k + (1 << 16) + // K1 = 85267 => k1 = 20091 + // K2 = 35468 => k2 = -30068 + // - The multiplication of a variable by a constant become the sum of the + // variable and the multiplication of that variable by the associated + // constant: + // (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x + + // Load and concatenate the transform coefficients (we'll do two transforms + // in parallel). + ref short srcRef = ref MemoryMarshal.GetReference(src); + var in0 = Vector128.Create(Unsafe.As(ref srcRef), 0); + var in1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 4)), 0); + var in2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 8)), 0); + var in3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 12)), 0); + + // a00 a10 a20 a30 x x x x + // a01 a11 a21 a31 x x x x + // a02 a12 a22 a32 x x x x + // a03 a13 a23 a33 x x x x + var inb0 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 16)), 0); + var inb1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 20)), 0); + var inb2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 24)), 0); + var inb3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 28)), 0); + + in0 = Sse2.UnpackLow(in0, inb0); + in1 = Sse2.UnpackLow(in1, inb1); + in2 = Sse2.UnpackLow(in2, inb2); + in3 = Sse2.UnpackLow(in3, inb3); + + // a00 a10 a20 a30 b00 b10 b20 b30 + // a01 a11 a21 a31 b01 b11 b21 b31 + // a02 a12 a22 a32 b02 b12 b22 b32 + // a03 a13 a23 a33 b03 b13 b23 b33 + + // Vertical pass and subsequent transpose. + // First pass, c and d calculations are longer because of the "trick" multiplications. + Vector128 a = Sse2.Add(in0.AsInt16(), in2.AsInt16()); + Vector128 b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16()); + + // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3 + Vector128 c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2); + Vector128 c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1); + Vector128 c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16()); + Vector128 c4 = Sse2.Subtract(c1, c2); + Vector128 c = Sse2.Add(c3.AsInt16(), c4); + + // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3 + Vector128 d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1); + Vector128 d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2); + Vector128 d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16()); + Vector128 d4 = Sse2.Add(d1, d2); + Vector128 d = Sse2.Add(d3, d4); + + // Second pass. + Vector128 tmp0 = Sse2.Add(a.AsInt16(), d); + Vector128 tmp1 = Sse2.Add(b.AsInt16(), c); + Vector128 tmp2 = Sse2.Subtract(b.AsInt16(), c); + Vector128 tmp3 = Sse2.Subtract(a.AsInt16(), d); + + // Transpose the two 4x4. + Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128 t0, out Vector128 t1, out Vector128 t2, out Vector128 t3); + + // Horizontal pass and subsequent transpose. + // First pass, c and d calculations are longer because of the "trick" multiplications. + Vector128 dc = Sse2.Add(t0.AsInt16(), Four); + a = Sse2.Add(dc, t2.AsInt16()); + b = Sse2.Subtract(dc, t2.AsInt16()); + + // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3 + c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2); + c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1); + c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16()); + c4 = Sse2.Subtract(c1, c2); + c = Sse2.Add(c3, c4); + + // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3 + d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1); + d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2); + d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16()); + d4 = Sse2.Add(d1, d2); + d = Sse2.Add(d3, d4); + + // Second pass. + tmp0 = Sse2.Add(a, d); + tmp1 = Sse2.Add(b, c); + tmp2 = Sse2.Subtract(b, c); + tmp3 = Sse2.Subtract(a, d); + Vector128 shifted0 = Sse2.ShiftRightArithmetic(tmp0, 3); + Vector128 shifted1 = Sse2.ShiftRightArithmetic(tmp1, 3); + Vector128 shifted2 = Sse2.ShiftRightArithmetic(tmp2, 3); + Vector128 shifted3 = Sse2.ShiftRightArithmetic(tmp3, 3); + + // Transpose the two 4x4. + Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3); + + // Add inverse transform to 'dst' and store. + // Load the reference(s). + // Load eight bytes/pixels per line. + ref byte dstRef = ref MemoryMarshal.GetReference(dst); + Vector128 dst0 = Vector128.Create(Unsafe.As(ref dstRef), 0).AsByte(); + Vector128 dst1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref dstRef, WebpConstants.Bps)), 0).AsByte(); + Vector128 dst2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref dstRef, WebpConstants.Bps * 2)), 0).AsByte(); + Vector128 dst3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref dstRef, WebpConstants.Bps * 3)), 0).AsByte(); + + // Convert to 16b. + dst0 = Sse2.UnpackLow(dst0, Vector128.Zero); + dst1 = Sse2.UnpackLow(dst1, Vector128.Zero); + dst2 = Sse2.UnpackLow(dst2, Vector128.Zero); + dst3 = Sse2.UnpackLow(dst3, Vector128.Zero); + + // Add the inverse transform(s). + dst0 = Sse2.Add(dst0.AsInt16(), t0.AsInt16()).AsByte(); + dst1 = Sse2.Add(dst1.AsInt16(), t1.AsInt16()).AsByte(); + dst2 = Sse2.Add(dst2.AsInt16(), t2.AsInt16()).AsByte(); + dst3 = Sse2.Add(dst3.AsInt16(), t3.AsInt16()).AsByte(); + + // Unsigned saturate to 8b. + dst0 = Sse2.PackUnsignedSaturate(dst0.AsInt16(), dst0.AsInt16()); + dst1 = Sse2.PackUnsignedSaturate(dst1.AsInt16(), dst1.AsInt16()); + dst2 = Sse2.PackUnsignedSaturate(dst2.AsInt16(), dst2.AsInt16()); + dst3 = Sse2.PackUnsignedSaturate(dst3.AsInt16(), dst3.AsInt16()); + + // Store the results. + // Store eight bytes/pixels per line. + ref byte outputRef = ref MemoryMarshal.GetReference(dst); + Unsafe.As>(ref outputRef) = dst0.GetLower(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps)) = dst1.GetLower(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 2)) = dst2.GetLower(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 3)) = dst3.GetLower(); + } + else +#endif + { + TransformOne(src, dst, scratch); + TransformOne(src.Slice(16), dst.Slice(4), scratch); + } } public static void TransformOne(Span src, Span dst, Span scratch) { - Span tmp = scratch.Slice(0, 16); - int tmpOffset = 0; - for (int srcOffset = 0; srcOffset < 4; srcOffset++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) { - // vertical pass - int srcOffsetPlus4 = srcOffset + 4; - int srcOffsetPlus8 = srcOffset + 8; - int srcOffsetPlus12 = srcOffset + 12; - int a = src[srcOffset] + src[srcOffsetPlus8]; - int b = src[srcOffset] - src[srcOffsetPlus8]; - int c = Mul2(src[srcOffsetPlus4]) - Mul1(src[srcOffsetPlus12]); - int d = Mul1(src[srcOffsetPlus4]) + Mul2(src[srcOffsetPlus12]); - tmp[tmpOffset++] = a + d; - tmp[tmpOffset++] = b + c; - tmp[tmpOffset++] = b - c; - tmp[tmpOffset++] = a - d; - } + // Load and concatenate the transform coefficients. + ref short srcRef = ref MemoryMarshal.GetReference(src); + var in0 = Vector128.Create(Unsafe.As(ref srcRef), 0); + var in1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 4)), 0); + var in2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 8)), 0); + var in3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, 12)), 0); + + // a00 a10 a20 a30 x x x x + // a01 a11 a21 a31 x x x x + // a02 a12 a22 a32 x x x x + // a03 a13 a23 a33 x x x x + + // Vertical pass and subsequent transpose. + // First pass, c and d calculations are longer because of the "trick" multiplications. + Vector128 a = Sse2.Add(in0.AsInt16(), in2.AsInt16()); + Vector128 b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16()); + + // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3 + Vector128 c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2); + Vector128 c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1); + Vector128 c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16()); + Vector128 c4 = Sse2.Subtract(c1, c2); + Vector128 c = Sse2.Add(c3.AsInt16(), c4); + + // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3 + Vector128 d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1); + Vector128 d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2); + Vector128 d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16()); + Vector128 d4 = Sse2.Add(d1, d2); + Vector128 d = Sse2.Add(d3, d4); + + // Second pass. + Vector128 tmp0 = Sse2.Add(a.AsInt16(), d); + Vector128 tmp1 = Sse2.Add(b.AsInt16(), c); + Vector128 tmp2 = Sse2.Subtract(b.AsInt16(), c); + Vector128 tmp3 = Sse2.Subtract(a.AsInt16(), d); + + // Transpose the two 4x4. + Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128 t0, out Vector128 t1, out Vector128 t2, out Vector128 t3); + + // Horizontal pass and subsequent transpose. + // First pass, c and d calculations are longer because of the "trick" multiplications. + Vector128 dc = Sse2.Add(t0.AsInt16(), Four); + a = Sse2.Add(dc, t2.AsInt16()); + b = Sse2.Subtract(dc, t2.AsInt16()); + + // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3 + c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2); + c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1); + c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16()); + c4 = Sse2.Subtract(c1, c2); + c = Sse2.Add(c3, c4); + + // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3 + d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1); + d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2); + d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16()); + d4 = Sse2.Add(d1, d2); + d = Sse2.Add(d3, d4); + + // Second pass. + tmp0 = Sse2.Add(a, d); + tmp1 = Sse2.Add(b, c); + tmp2 = Sse2.Subtract(b, c); + tmp3 = Sse2.Subtract(a, d); + Vector128 shifted0 = Sse2.ShiftRightArithmetic(tmp0, 3); + Vector128 shifted1 = Sse2.ShiftRightArithmetic(tmp1, 3); + Vector128 shifted2 = Sse2.ShiftRightArithmetic(tmp2, 3); + Vector128 shifted3 = Sse2.ShiftRightArithmetic(tmp3, 3); + + // Transpose the two 4x4. + Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3); + + // Add inverse transform to 'dst' and store. + // Load the reference(s). + // Load four bytes/pixels per line. + ref byte dstRef = ref MemoryMarshal.GetReference(dst); + Vector128 dst0 = Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref dstRef)).AsByte(); + Vector128 dst1 = Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref Unsafe.Add(ref dstRef, WebpConstants.Bps))).AsByte(); + Vector128 dst2 = Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref Unsafe.Add(ref dstRef, WebpConstants.Bps * 2))).AsByte(); + Vector128 dst3 = Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref Unsafe.Add(ref dstRef, WebpConstants.Bps * 3))).AsByte(); - // Each pass is expanding the dynamic range by ~3.85 (upper bound). - // The exact value is (2. + (20091 + 35468) / 65536). - // After the second pass, maximum interval is [-3794, 3794], assuming - // an input in [-2048, 2047] interval. We then need to add a dst value in the [0, 255] range. - // In the worst case scenario, the input to clip_8b() can be as large as [-60713, 60968]. - tmpOffset = 0; - int dstOffset = 0; - for (int i = 0; i < 4; i++) + // Convert to 16b. + dst0 = Sse2.UnpackLow(dst0, Vector128.Zero); + dst1 = Sse2.UnpackLow(dst1, Vector128.Zero); + dst2 = Sse2.UnpackLow(dst2, Vector128.Zero); + dst3 = Sse2.UnpackLow(dst3, Vector128.Zero); + + // Add the inverse transform(s). + dst0 = Sse2.Add(dst0.AsInt16(), t0.AsInt16()).AsByte(); + dst1 = Sse2.Add(dst1.AsInt16(), t1.AsInt16()).AsByte(); + dst2 = Sse2.Add(dst2.AsInt16(), t2.AsInt16()).AsByte(); + dst3 = Sse2.Add(dst3.AsInt16(), t3.AsInt16()).AsByte(); + + // Unsigned saturate to 8b. + dst0 = Sse2.PackUnsignedSaturate(dst0.AsInt16(), dst0.AsInt16()); + dst1 = Sse2.PackUnsignedSaturate(dst1.AsInt16(), dst1.AsInt16()); + dst2 = Sse2.PackUnsignedSaturate(dst2.AsInt16(), dst2.AsInt16()); + dst3 = Sse2.PackUnsignedSaturate(dst3.AsInt16(), dst3.AsInt16()); + + // Store the results. + // Store four bytes/pixels per line. + ref byte outputRef = ref MemoryMarshal.GetReference(dst); + int output0 = Sse2.ConvertToInt32(dst0.AsInt32()); + int output1 = Sse2.ConvertToInt32(dst1.AsInt32()); + int output2 = Sse2.ConvertToInt32(dst2.AsInt32()); + int output3 = Sse2.ConvertToInt32(dst3.AsInt32()); + Unsafe.As(ref outputRef) = output0; + Unsafe.As(ref Unsafe.Add(ref outputRef, WebpConstants.Bps)) = output1; + Unsafe.As(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 2)) = output2; + Unsafe.As(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 3)) = output3; + } + else +#endif { - // horizontal pass - int tmpOffsetPlus4 = tmpOffset + 4; - int tmpOffsetPlus8 = tmpOffset + 8; - int tmpOffsetPlus12 = tmpOffset + 12; - int dc = tmp[tmpOffset] + 4; - int a = dc + tmp[tmpOffsetPlus8]; - int b = dc - tmp[tmpOffsetPlus8]; - int c = Mul2(tmp[tmpOffsetPlus4]) - Mul1(tmp[tmpOffsetPlus12]); - int d = Mul1(tmp[tmpOffsetPlus4]) + Mul2(tmp[tmpOffsetPlus12]); - Store(dst.Slice(dstOffset), 0, 0, a + d); - Store(dst.Slice(dstOffset), 1, 0, b + c); - Store(dst.Slice(dstOffset), 2, 0, b - c); - Store(dst.Slice(dstOffset), 3, 0, a - d); - tmpOffset++; - - dstOffset += WebpConstants.Bps; + Span tmp = scratch.Slice(0, 16); + int tmpOffset = 0; + for (int srcOffset = 0; srcOffset < 4; srcOffset++) + { + // vertical pass + int srcOffsetPlus4 = srcOffset + 4; + int srcOffsetPlus8 = srcOffset + 8; + int srcOffsetPlus12 = srcOffset + 12; + int a = src[srcOffset] + src[srcOffsetPlus8]; + int b = src[srcOffset] - src[srcOffsetPlus8]; + int c = Mul2(src[srcOffsetPlus4]) - Mul1(src[srcOffsetPlus12]); + int d = Mul1(src[srcOffsetPlus4]) + Mul2(src[srcOffsetPlus12]); + tmp[tmpOffset++] = a + d; + tmp[tmpOffset++] = b + c; + tmp[tmpOffset++] = b - c; + tmp[tmpOffset++] = a - d; + } + + // Each pass is expanding the dynamic range by ~3.85 (upper bound). + // The exact value is (2. + (20091 + 35468) / 65536). + // After the second pass, maximum interval is [-3794, 3794], assuming + // an input in [-2048, 2047] interval. We then need to add a dst value in the [0, 255] range. + // In the worst case scenario, the input to clip_8b() can be as large as [-60713, 60968]. + tmpOffset = 0; + int dstOffset = 0; + for (int i = 0; i < 4; i++) + { + // horizontal pass + int tmpOffsetPlus4 = tmpOffset + 4; + int tmpOffsetPlus8 = tmpOffset + 8; + int tmpOffsetPlus12 = tmpOffset + 12; + int dc = tmp[tmpOffset] + 4; + int a = dc + tmp[tmpOffsetPlus8]; + int b = dc - tmp[tmpOffsetPlus8]; + int c = Mul2(tmp[tmpOffsetPlus4]) - Mul1(tmp[tmpOffsetPlus12]); + int d = Mul1(tmp[tmpOffsetPlus4]) + Mul2(tmp[tmpOffsetPlus12]); + Store(dst.Slice(dstOffset), 0, 0, a + d); + Store(dst.Slice(dstOffset), 1, 0, b + c); + Store(dst.Slice(dstOffset), 2, 0, b - c); + Store(dst.Slice(dstOffset), 3, 0, a - d); + tmpOffset++; + + dstOffset += WebpConstants.Bps; + } } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 2fcea8cee..2a40cffdc 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -25,6 +25,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy #if SUPPORTS_RUNTIME_INTRINSICS private static readonly Vector128 MaxCoeff2047 = Vector128.Create((short)MaxLevel); + private static readonly Vector256 MaxCoeff2047Vec256 = Vector256.Create((short)MaxLevel); + + private static readonly Vector256 Cst256 = Vector256.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13, 2, 3, 8, 9, 10, 11, 4, 5, 254, 255, 6, 7, 12, 13, 14, 15); + + private static readonly Vector256 Cst78 = Vector256.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 0, 1, 254, 255, 254, 255, 254, 255, 254, 255); + private static readonly Vector128 CstLo = Vector128.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13); private static readonly Vector128 Cst7 = Vector128.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255); @@ -329,7 +335,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy LossyUtils.TransformWht(dcTmp, tmp, scratch); for (n = 0; n < 16; n += 2) { - Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), scratch); + Vp8Encoding.ITransformTwo(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), scratch); } return nz; @@ -375,7 +381,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy for (n = 0; n < 8; n += 2) { - Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), scratch); + Vp8Encoding.ITransformTwo(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), scratch); } return nz << 16; @@ -531,7 +537,70 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static int QuantizeBlock(Span input, Span output, ref Vp8Matrix mtx) { #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse41.IsSupported) + if (Avx2.IsSupported) + { + // Load all inputs. + Vector256 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); + Vector256 iq0 = Unsafe.As>(ref mtx.IQ[0]); + Vector256 q0 = Unsafe.As>(ref mtx.Q[0]); + + // coeff = abs(in) + Vector256 coeff0 = Avx2.Abs(input0); + + // coeff = abs(in) + sharpen + Vector256 sharpen0 = Unsafe.As>(ref mtx.Sharpen[0]); + Avx2.Add(coeff0.AsInt16(), sharpen0); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector256 coeffiQ0H = Avx2.MultiplyHigh(coeff0, iq0); + Vector256 coeffiQ0L = Avx2.MultiplyLow(coeff0, iq0); + Vector256 out00 = Avx2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector256 out08 = Avx2.UnpackHigh(coeffiQ0L, coeffiQ0H); + + // out = (coeff * iQ + B) + Vector256 bias00 = Unsafe.As>(ref mtx.Bias[0]); + Vector256 bias08 = Unsafe.As>(ref mtx.Bias[8]); + out00 = Avx2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out08 = Avx2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Avx2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Avx2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // Pack result as 16b. + Vector256 out0 = Avx2.PackSignedSaturate(out00.AsInt32(), out08.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Avx2.Min(out0, MaxCoeff2047Vec256); + + // Put the sign back. + out0 = Avx2.Sign(out0, input0); + + // in = out * Q + input0 = Avx2.MultiplyLow(out0, q0.AsInt16()); + ref short inputRef = ref MemoryMarshal.GetReference(input); + Unsafe.As>(ref inputRef) = input0; + + // zigzag the output before storing it. + Vector256 tmp256 = Avx2.Shuffle(out0.AsByte(), Cst256); + Vector256 tmp78 = Avx2.Shuffle(out0.AsByte(), Cst78); + + // Reverse the order of the 16-byte lanes. + Vector256 tmp87 = Avx2.Permute2x128(tmp78, tmp78, 1); + Vector256 outZ = Avx2.Or(tmp256, tmp87).AsInt16(); + + ref short outputRef = ref MemoryMarshal.GetReference(output); + Unsafe.As>(ref outputRef) = outZ; + + Vector256 packedOutput = Avx2.PackSignedSaturate(outZ, outZ); + + // Detect if all 'out' values are zeros or not. + Vector256 cmpeq = Avx2.CompareEqual(packedOutput, Vector256.Zero); + return Avx2.MoveMask(cmpeq) != -1 ? 1 : 0; + } + else if (Sse41.IsSupported) { // Load all inputs. Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); @@ -579,7 +648,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); - // pack result as 16b + // Pack result as 16b. Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); @@ -587,7 +656,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy out0 = Sse2.Min(out0, MaxCoeff2047); out8 = Sse2.Min(out8, MaxCoeff2047); - // put sign back + // Put the sign back. out0 = Ssse3.Sign(out0, input0); out8 = Ssse3.Sign(out8, input8); @@ -726,7 +795,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { uint v = src[0] * 0x01010101u; Span vSpan = BitConverter.GetBytes(v).AsSpan(); - for (int i = 0; i < 16; i++) + for (nint i = 0; i < 16; i++) { if (!src.Slice(0, 4).SequenceEqual(vSpan) || !src.Slice(4, 4).SequenceEqual(vSpan) || !src.Slice(8, 4).SequenceEqual(vSpan) || !src.Slice(12, 4).SequenceEqual(vSpan)) @@ -744,19 +813,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private static bool IsFlat(Span levels, int numBlocks, int thresh) { int score = 0; + ref short levelsRef = ref MemoryMarshal.GetReference(levels); + int offset = 0; while (numBlocks-- > 0) { - for (int i = 1; i < 16; i++) + for (nint i = 1; i < 16; i++) { // omit DC, we're only interested in AC - score += levels[i] != 0 ? 1 : 0; + score += Unsafe.Add(ref levelsRef, offset) != 0 ? 1 : 0; if (score > thresh) { return false; } } - levels = levels.Slice(16); + offset += 16; } return true; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index fcd61f2c0..eb8d92110 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -358,7 +358,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int kThreshold = 8 + ((17 - 8) * q / 100); int k; Span dc = stackalloc uint[16]; - Span tmp = stackalloc ushort[16]; uint m; uint m2; for (k = 0; k < 16; k += 4) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index aa4ab5767..65b1d07d3 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// /// Methods for encoding a VP8 frame. /// - internal static class Vp8Encoding + internal static unsafe class Vp8Encoding { private const int KC1 = 20091 + (1 << 16); @@ -66,11 +66,39 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static readonly int[] Vp8I4ModeOffsets = { I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 }; #if SUPPORTS_RUNTIME_INTRINSICS - public static readonly Vector128 K1 = Vector128.Create((short)20091).AsInt16(); +#pragma warning disable SA1310 // Field names should not contain underscore + private static readonly Vector128 K1 = Vector128.Create((short)20091).AsInt16(); - public static readonly Vector128 K2 = Vector128.Create((short)-30068).AsInt16(); + private static readonly Vector128 K2 = Vector128.Create((short)-30068).AsInt16(); - public static readonly Vector128 Four = Vector128.Create((short)4); + private static readonly Vector128 Four = Vector128.Create((short)4); + + private static readonly Vector128 Seven = Vector128.Create((short)7); + + private static readonly Vector128 K88p = Vector128.Create(8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0).AsInt16(); + + private static readonly Vector128 K88m = Vector128.Create(8, 0, 248, 255, 8, 0, 248, 255, 8, 0, 248, 255, 8, 0, 248, 255).AsInt16(); + + private static readonly Vector128 K5352_2217p = Vector128.Create(232, 20, 169, 8, 232, 20, 169, 8, 232, 20, 169, 8, 232, 20, 169, 8).AsInt16(); + + private static readonly Vector128 K5352_2217m = Vector128.Create(169, 8, 24, 235, 169, 8, 24, 235, 169, 8, 24, 235, 169, 8, 24, 235).AsInt16(); + + private static readonly Vector128 K937 = Vector128.Create(937); + + private static readonly Vector128 K1812 = Vector128.Create(1812); + + private static readonly Vector128 K5352_2217 = Vector128.Create(169, 8, 232, 20, 169, 8, 232, 20, 169, 8, 232, 20, 169, 8, 232, 20).AsInt16(); + + private static readonly Vector128 K2217_5352 = Vector128.Create(24, 235, 169, 8, 24, 235, 169, 8, 24, 235, 169, 8, 24, 235, 169, 8).AsInt16(); + + private static readonly Vector128 K12000PlusOne = Vector128.Create(12000 + (1 << 16)); + + private static readonly Vector128 K51000 = Vector128.Create(51000); + + private static readonly byte MmShuffle2301 = SimdUtils.Shuffle.MmShuffle(2, 3, 0, 1); + + private static readonly byte MmShuffle1032 = SimdUtils.Shuffle.MmShuffle(1, 0, 3, 2); +#pragma warning restore SA1310 // Field names should not contain underscore #endif static Vp8Encoding() @@ -83,7 +111,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Transforms (Paragraph 14.4) // Does two inverse transforms. - public static void ITransform(Span reference, Span input, Span dst, Span scratch) + public static void ITransformTwo(Span reference, Span input, Span dst, Span scratch) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) @@ -180,10 +208,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy ref2 = Sse2.PackUnsignedSaturate(ref2InvAdded, ref2InvAdded); ref3 = Sse2.PackUnsignedSaturate(ref3InvAdded, ref3InvAdded); - // Unsigned saturate to 8b. - ref byte outputRef = ref MemoryMarshal.GetReference(dst); - // Store eight bytes/pixels per line. + ref byte outputRef = ref MemoryMarshal.GetReference(dst); Unsafe.As>(ref outputRef) = ref0.GetLower(); Unsafe.As>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps)) = ref1.GetLower(); Unsafe.As>(ref Unsafe.Add(ref outputRef, WebpConstants.Bps * 2)) = ref2.GetLower(); @@ -376,49 +402,246 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static void FTransform2(Span src, Span reference, Span output, Span output2, Span scratch) { - FTransform(src, reference, output, scratch); - FTransform(src.Slice(4), reference.Slice(4), output2, scratch); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + ref byte srcRef = ref MemoryMarshal.GetReference(src); + ref byte referenceRef = ref MemoryMarshal.GetReference(reference); + + // Load src. + var src0 = Vector128.Create(Unsafe.As(ref srcRef), 0); + var src1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps)), 0); + var src2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 2)), 0); + var src3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 3)), 0); + + // Load ref. + var ref0 = Vector128.Create(Unsafe.As(ref referenceRef), 0); + var ref1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps)), 0); + var ref2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2)), 0); + var ref3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3)), 0); + + // Convert both to 16 bit. + Vector128 srcLow0 = Sse2.UnpackLow(src0.AsByte(), Vector128.Zero); + Vector128 srcLow1 = Sse2.UnpackLow(src1.AsByte(), Vector128.Zero); + Vector128 srcLow2 = Sse2.UnpackLow(src2.AsByte(), Vector128.Zero); + Vector128 srcLow3 = Sse2.UnpackLow(src3.AsByte(), Vector128.Zero); + Vector128 refLow0 = Sse2.UnpackLow(ref0.AsByte(), Vector128.Zero); + Vector128 refLow1 = Sse2.UnpackLow(ref1.AsByte(), Vector128.Zero); + Vector128 refLow2 = Sse2.UnpackLow(ref2.AsByte(), Vector128.Zero); + Vector128 refLow3 = Sse2.UnpackLow(ref3.AsByte(), Vector128.Zero); + + // Compute difference. -> 00 01 02 03 00' 01' 02' 03' + Vector128 diff0 = Sse2.Subtract(srcLow0.AsInt16(), refLow0.AsInt16()); + Vector128 diff1 = Sse2.Subtract(srcLow1.AsInt16(), refLow1.AsInt16()); + Vector128 diff2 = Sse2.Subtract(srcLow2.AsInt16(), refLow2.AsInt16()); + Vector128 diff3 = Sse2.Subtract(srcLow3.AsInt16(), refLow3.AsInt16()); + + // Unpack and shuffle. + // 00 01 02 03 0 0 0 0 + // 10 11 12 13 0 0 0 0 + // 20 21 22 23 0 0 0 0 + // 30 31 32 33 0 0 0 0 + Vector128 shuf01l = Sse2.UnpackLow(diff0.AsInt32(), diff1.AsInt32()); + Vector128 shuf23l = Sse2.UnpackLow(diff2.AsInt32(), diff3.AsInt32()); + Vector128 shuf01h = Sse2.UnpackHigh(diff0.AsInt32(), diff1.AsInt32()); + Vector128 shuf23h = Sse2.UnpackHigh(diff2.AsInt32(), diff3.AsInt32()); + + // First pass. + FTransformPass1SSE2(shuf01l.AsInt16(), shuf23l.AsInt16(), out Vector128 v01l, out Vector128 v32l); + FTransformPass1SSE2(shuf01h.AsInt16(), shuf23h.AsInt16(), out Vector128 v01h, out Vector128 v32h); + + // Second pass. + FTransformPass2SSE2(v01l, v32l, output); + FTransformPass2SSE2(v01h, v32h, output2); + } + else +#endif + { + FTransform(src, reference, output, scratch); + FTransform(src.Slice(4), reference.Slice(4), output2, scratch); + } } public static void FTransform(Span src, Span reference, Span output, Span scratch) { - int i; - Span tmp = scratch.Slice(0, 16); - - int srcIdx = 0; - int refIdx = 0; - for (i = 0; i < 4; i++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) { - int d0 = src[srcIdx] - reference[refIdx]; // 9bit dynamic range ([-255,255]) - int d1 = src[srcIdx + 1] - reference[refIdx + 1]; - int d2 = src[srcIdx + 2] - reference[refIdx + 2]; - int d3 = src[srcIdx + 3] - reference[refIdx + 3]; - int a0 = d0 + d3; // 10b [-510,510] - int a1 = d1 + d2; - int a2 = d1 - d2; - int a3 = d0 - d3; - tmp[0 + (i * 4)] = (a0 + a1) * 8; // 14b [-8160,8160] - tmp[1 + (i * 4)] = ((a2 * 2217) + (a3 * 5352) + 1812) >> 9; // [-7536,7542] - tmp[2 + (i * 4)] = (a0 - a1) * 8; - tmp[3 + (i * 4)] = ((a3 * 2217) - (a2 * 5352) + 937) >> 9; - - srcIdx += WebpConstants.Bps; - refIdx += WebpConstants.Bps; - } + ref byte srcRef = ref MemoryMarshal.GetReference(src); + ref byte referenceRef = ref MemoryMarshal.GetReference(reference); - for (i = 0; i < 4; i++) + // Load src. + var src0 = Vector128.Create(Unsafe.As(ref srcRef), 0); + var src1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps)), 0); + var src2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 2)), 0); + var src3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, WebpConstants.Bps * 3)), 0); + + // Load ref. + var ref0 = Vector128.Create(Unsafe.As(ref referenceRef), 0); + var ref1 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps)), 0); + var ref2 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 2)), 0); + var ref3 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref referenceRef, WebpConstants.Bps * 3)), 0); + + // 00 01 02 03 * + // 10 11 12 13 * + // 20 21 22 23 * + // 30 31 32 33 * + // Shuffle. + Vector128 srcLow0 = Sse2.UnpackLow(src0.AsInt16(), src1.AsInt16()); + Vector128 srcLow1 = Sse2.UnpackLow(src2.AsInt16(), src3.AsInt16()); + Vector128 refLow0 = Sse2.UnpackLow(ref0.AsInt16(), ref1.AsInt16()); + Vector128 refLow1 = Sse2.UnpackLow(ref2.AsInt16(), ref3.AsInt16()); + + // 00 01 10 11 02 03 12 13 * * ... + // 20 21 30 31 22 22 32 33 * * ... + + // Convert both to 16 bit. + Vector128 src0_16b = Sse2.UnpackLow(srcLow0.AsByte(), Vector128.Zero); + Vector128 src1_16b = Sse2.UnpackLow(srcLow1.AsByte(), Vector128.Zero); + Vector128 ref0_16b = Sse2.UnpackLow(refLow0.AsByte(), Vector128.Zero); + Vector128 ref1_16b = Sse2.UnpackLow(refLow1.AsByte(), Vector128.Zero); + + // Compute the difference. + Vector128 row01 = Sse2.Subtract(src0_16b.AsInt16(), ref0_16b.AsInt16()); + Vector128 row23 = Sse2.Subtract(src1_16b.AsInt16(), ref1_16b.AsInt16()); + + // First pass. + FTransformPass1SSE2(row01, row23, out Vector128 v01, out Vector128 v32); + + // Second pass. + FTransformPass2SSE2(v01, v32, output); + } + else +#endif { - int a0 = tmp[0 + i] + tmp[12 + i]; // 15b - int a1 = tmp[4 + i] + tmp[8 + i]; - int a2 = tmp[4 + i] - tmp[8 + i]; - int a3 = tmp[0 + i] - tmp[12 + i]; - output[0 + i] = (short)((a0 + a1 + 7) >> 4); // 12b - output[4 + i] = (short)((((a2 * 2217) + (a3 * 5352) + 12000) >> 16) + (a3 != 0 ? 1 : 0)); - output[8 + i] = (short)((a0 - a1 + 7) >> 4); - output[12 + i] = (short)(((a3 * 2217) - (a2 * 5352) + 51000) >> 16); + int i; + Span tmp = scratch.Slice(0, 16); + + int srcIdx = 0; + int refIdx = 0; + for (i = 0; i < 4; i++) + { + int d3 = src[srcIdx + 3] - reference[refIdx + 3]; + int d2 = src[srcIdx + 2] - reference[refIdx + 2]; + int d1 = src[srcIdx + 1] - reference[refIdx + 1]; + int d0 = src[srcIdx] - reference[refIdx]; // 9bit dynamic range ([-255,255]) + int a0 = d0 + d3; // 10b [-510,510] + int a1 = d1 + d2; + int a2 = d1 - d2; + int a3 = d0 - d3; + tmp[3 + (i * 4)] = ((a3 * 2217) - (a2 * 5352) + 937) >> 9; + tmp[2 + (i * 4)] = (a0 - a1) * 8; + tmp[1 + (i * 4)] = ((a2 * 2217) + (a3 * 5352) + 1812) >> 9; // [-7536,7542] + tmp[0 + (i * 4)] = (a0 + a1) * 8; // 14b [-8160,8160] + + srcIdx += WebpConstants.Bps; + refIdx += WebpConstants.Bps; + } + + for (i = 0; i < 4; i++) + { + int t12 = tmp[12 + i]; // 15b + int t8 = tmp[8 + i]; + + int a1 = tmp[4 + i] + t8; + int a2 = tmp[4 + i] - t8; + int a0 = tmp[0 + i] + t12; // 15b + int a3 = tmp[0 + i] - t12; + + output[12 + i] = (short)(((a3 * 2217) - (a2 * 5352) + 51000) >> 16); + output[8 + i] = (short)((a0 - a1 + 7) >> 4); + output[4 + i] = (short)((((a2 * 2217) + (a3 * 5352) + 12000) >> 16) + (a3 != 0 ? 1 : 0)); + output[0 + i] = (short)((a0 + a1 + 7) >> 4); // 12b + } } } +#if SUPPORTS_RUNTIME_INTRINSICS + public static void FTransformPass1SSE2(Vector128 row01, Vector128 row23, out Vector128 out01, out Vector128 out32) + { + // *in01 = 00 01 10 11 02 03 12 13 + // *in23 = 20 21 30 31 22 23 32 33 + Vector128 shuf01_p = Sse2.ShuffleHigh(row01, MmShuffle2301); + Vector128 shuf32_p = Sse2.ShuffleHigh(row23, MmShuffle2301); + + // 00 01 10 11 03 02 13 12 + // 20 21 30 31 23 22 33 32 + Vector128 s01 = Sse2.UnpackLow(shuf01_p.AsInt64(), shuf32_p.AsInt64()); + Vector128 s32 = Sse2.UnpackHigh(shuf01_p.AsInt64(), shuf32_p.AsInt64()); + + // 00 01 10 11 20 21 30 31 + // 03 02 13 12 23 22 33 32 + Vector128 a01 = Sse2.Add(s01.AsInt16(), s32.AsInt16()); + Vector128 a32 = Sse2.Subtract(s01.AsInt16(), s32.AsInt16()); + + // [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ] + // [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ] + Vector128 tmp0 = Sse2.MultiplyAddAdjacent(a01, K88p); // [ (a0 + a1) << 3, ... ] + Vector128 tmp2 = Sse2.MultiplyAddAdjacent(a01, K88m); // [ (a0 - a1) << 3, ... ] + Vector128 tmp11 = Sse2.MultiplyAddAdjacent(a32, K5352_2217p); + Vector128 tmp31 = Sse2.MultiplyAddAdjacent(a32, K5352_2217m); + Vector128 tmp12 = Sse2.Add(tmp11, K1812); + Vector128 tmp32 = Sse2.Add(tmp31, K937); + Vector128 tmp1 = Sse2.ShiftRightArithmetic(tmp12, 9); + Vector128 tmp3 = Sse2.ShiftRightArithmetic(tmp32, 9); + Vector128 s03 = Sse2.PackSignedSaturate(tmp0, tmp2); + Vector128 s12 = Sse2.PackSignedSaturate(tmp1, tmp3); + Vector128 slo = Sse2.UnpackLow(s03, s12); // 0 1 0 1 0 1... + Vector128 shi = Sse2.UnpackHigh(s03, s12); // 2 3 2 3 2 3 + Vector128 v23 = Sse2.UnpackHigh(slo.AsInt32(), shi.AsInt32()); + out01 = Sse2.UnpackLow(slo.AsInt32(), shi.AsInt32()); + out32 = Sse2.Shuffle(v23, MmShuffle1032); + } + + public static void FTransformPass2SSE2(Vector128 v01, Vector128 v32, Span output) + { + // Same operations are done on the (0,3) and (1,2) pairs. + // a3 = v0 - v3 + // a2 = v1 - v2 + Vector128 a32 = Sse2.Subtract(v01.AsInt16(), v32.AsInt16()); + Vector128 a22 = Sse2.UnpackHigh(a32.AsInt64(), a32.AsInt64()); + + Vector128 b23 = Sse2.UnpackLow(a22.AsInt16(), a32.AsInt16()); + Vector128 c1 = Sse2.MultiplyAddAdjacent(b23, K5352_2217); + Vector128 c3 = Sse2.MultiplyAddAdjacent(b23, K2217_5352); + Vector128 d1 = Sse2.Add(c1, K12000PlusOne); + Vector128 d3 = Sse2.Add(c3, K51000); + Vector128 e1 = Sse2.ShiftRightArithmetic(d1, 16); + Vector128 e3 = Sse2.ShiftRightArithmetic(d3, 16); + + // f1 = ((b3 * 5352 + b2 * 2217 + 12000) >> 16) + // f3 = ((b3 * 2217 - b2 * 5352 + 51000) >> 16) + Vector128 f1 = Sse2.PackSignedSaturate(e1, e1); + Vector128 f3 = Sse2.PackSignedSaturate(e3, e3); + + // g1 = f1 + (a3 != 0); + // The compare will return (0xffff, 0) for (==0, !=0). To turn that into the + // desired (0, 1), we add one earlier through k12000_plus_one. + // -> g1 = f1 + 1 - (a3 == 0) + Vector128 g1 = Sse2.Add(f1, Sse2.CompareEqual(a32, Vector128.Zero)); + + // a0 = v0 + v3 + // a1 = v1 + v2 + Vector128 a01 = Sse2.Add(v01.AsInt16(), v32.AsInt16()); + Vector128 a01Plus7 = Sse2.Add(a01.AsInt16(), Seven); + Vector128 a11 = Sse2.UnpackHigh(a01.AsInt64(), a01.AsInt64()).AsInt16(); + Vector128 c0 = Sse2.Add(a01Plus7, a11); + Vector128 c2 = Sse2.Subtract(a01Plus7, a11); + + // d0 = (a0 + a1 + 7) >> 4; + // d2 = (a0 - a1 + 7) >> 4; + Vector128 d0 = Sse2.ShiftRightArithmetic(c0, 4); + Vector128 d2 = Sse2.ShiftRightArithmetic(c2, 4); + + Vector128 d0g1 = Sse2.UnpackLow(d0.AsInt64(), g1.AsInt64()); + Vector128 d2f3 = Sse2.UnpackLow(d2.AsInt64(), f3.AsInt64()); + + ref short outputRef = ref MemoryMarshal.GetReference(output); + Unsafe.As>(ref outputRef) = d0g1.AsInt16(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)) = d2f3.AsInt16(); + } +#endif + public static void FTransformWht(Span input, Span output, Span scratch) { Span tmp = scratch.Slice(0, 16); @@ -427,32 +650,37 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int inputIdx = 0; for (i = 0; i < 4; i++) { - int a0 = input[inputIdx + (0 * 16)] + input[inputIdx + (2 * 16)]; // 13b int a1 = input[inputIdx + (1 * 16)] + input[inputIdx + (3 * 16)]; int a2 = input[inputIdx + (1 * 16)] - input[inputIdx + (3 * 16)]; + int a0 = input[inputIdx + (0 * 16)] + input[inputIdx + (2 * 16)]; // 13b int a3 = input[inputIdx + (0 * 16)] - input[inputIdx + (2 * 16)]; - tmp[0 + (i * 4)] = a0 + a1; // 14b - tmp[1 + (i * 4)] = a3 + a2; - tmp[2 + (i * 4)] = a3 - a2; tmp[3 + (i * 4)] = a0 - a1; + tmp[2 + (i * 4)] = a3 - a2; + tmp[1 + (i * 4)] = a3 + a2; + tmp[0 + (i * 4)] = a0 + a1; // 14b inputIdx += 64; } for (i = 0; i < 4; i++) { - int a0 = tmp[0 + i] + tmp[8 + i]; // 15b - int a1 = tmp[4 + i] + tmp[12 + i]; - int a2 = tmp[4 + i] - tmp[12 + i]; - int a3 = tmp[0 + i] - tmp[8 + i]; + int t12 = tmp[12 + i]; + int t8 = tmp[8 + i]; + + int a1 = tmp[4 + i] + t12; + int a2 = tmp[4 + i] - t12; + int a0 = tmp[0 + i] + t8; // 15b + int a3 = tmp[0 + i] - t8; + int b0 = a0 + a1; // 16b int b1 = a3 + a2; int b2 = a3 - a2; int b3 = a0 - a1; - output[0 + i] = (short)(b0 >> 1); // 15b - output[4 + i] = (short)(b1 >> 1); - output[8 + i] = (short)(b2 >> 1); + output[12 + i] = (short)(b3 >> 1); + output[8 + i] = (short)(b2 >> 1); + output[4 + i] = (short)(b1 >> 1); + output[0 + i] = (short)(b0 >> 1); // 15b } } diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index 6ad20713d..f90e40edb 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -70,11 +70,6 @@ True Block8x8F.Generated.tt - - True - True - GenericBlock8x8.Generated.tt - True True @@ -167,10 +162,6 @@ TextTemplatingFileGenerator Block8x8F.Generated.cs - - TextTemplatingFileGenerator - GenericBlock8x8.Generated.cs - TextTemplatingFileGenerator Block8x8F.Generated.cs diff --git a/tests/ImageSharp.Tests.ProfilingSandbox/Program.cs b/tests/ImageSharp.Tests.ProfilingSandbox/Program.cs index bbdb73ffb..bc0b40bad 100644 --- a/tests/ImageSharp.Tests.ProfilingSandbox/Program.cs +++ b/tests/ImageSharp.Tests.ProfilingSandbox/Program.cs @@ -72,12 +72,6 @@ namespace SixLabors.ImageSharp.Tests.ProfilingSandbox benchmarks.EncodeJpeg_SingleMidSize(); } - private static void RunJpegColorProfilingTests() - { - new JpegColorConverterTests(new ConsoleOutput()).BenchmarkYCbCr(false); - new JpegColorConverterTests(new ConsoleOutput()).BenchmarkYCbCr(true); - } - private static void RunResizeProfilingTest() { var test = new ResizeProfilingBenchmarks(new ConsoleOutput()); diff --git a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs index 3003265ca..bfc290c2e 100644 --- a/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs +++ b/tests/ImageSharp.Tests/Color/ColorTests.CastTo.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System.Numerics; using SixLabors.ImageSharp.PixelFormats; using Xunit; @@ -90,17 +91,30 @@ namespace SixLabors.ImageSharp.Tests } [Fact] - public void GenericPixel() + public void Vector4Constructor() { - AssertGenericPixel(new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue)); - AssertGenericPixel(new Rgba64(1, 2, ushort.MaxValue, ushort.MaxValue - 1)); - AssertGenericPixel(new Rgb48(1, 2, ushort.MaxValue - 1)); - AssertGenericPixel(new La32(1, ushort.MaxValue - 1)); - AssertGenericPixel(new L16(ushort.MaxValue - 1)); - AssertGenericPixel(new Rgba32(1, 2, 255, 254)); + // Act: + Color color = new(Vector4.One); + + // Assert: + Assert.Equal(new RgbaVector(1, 1, 1, 1), color.ToPixel()); + Assert.Equal(new Rgba64(65535, 65535, 65535, 65535), color.ToPixel()); + Assert.Equal(new Rgba32(255, 255, 255, 255), color.ToPixel()); + Assert.Equal(new L8(255), color.ToPixel()); + } + + [Fact] + public void GenericPixelRoundTrip() + { + AssertGenericPixelRoundTrip(new RgbaVector(float.Epsilon, 2 * float.Epsilon, float.MaxValue, float.MinValue)); + AssertGenericPixelRoundTrip(new Rgba64(1, 2, ushort.MaxValue, ushort.MaxValue - 1)); + AssertGenericPixelRoundTrip(new Rgb48(1, 2, ushort.MaxValue - 1)); + AssertGenericPixelRoundTrip(new La32(1, ushort.MaxValue - 1)); + AssertGenericPixelRoundTrip(new L16(ushort.MaxValue - 1)); + AssertGenericPixelRoundTrip(new Rgba32(1, 2, 255, 254)); } - private static void AssertGenericPixel(TPixel source) + private static void AssertGenericPixelRoundTrip(TPixel source) where TPixel : unmanaged, IPixel { // Act: @@ -110,6 +124,27 @@ namespace SixLabors.ImageSharp.Tests TPixel actual = color.ToPixel(); Assert.Equal(source, actual); } + + [Fact] + public void GenericPixelDifferentPrecision() + { + AssertGenericPixelDifferentPrecision(new RgbaVector(1, 1, 1, 1), new Rgba64(65535, 65535, 65535, 65535)); + AssertGenericPixelDifferentPrecision(new RgbaVector(1, 1, 1, 1), new Rgba32(255, 255, 255, 255)); + AssertGenericPixelDifferentPrecision(new Rgba64(65535, 65535, 65535, 65535), new Rgba32(255, 255, 255, 255)); + AssertGenericPixelDifferentPrecision(new Rgba32(255, 255, 255, 255), new L8(255)); + } + + private static void AssertGenericPixelDifferentPrecision(TPixel source, TPixel2 expected) + where TPixel : unmanaged, IPixel + where TPixel2 : unmanaged, IPixel + { + // Act: + var color = Color.FromPixel(source); + + // Assert: + TPixel2 actual = color.ToPixel(); + Assert.Equal(expected, actual); + } } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 8f5f10f19..ae7e81254 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -114,56 +114,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // PrintLinearData((Span)mirror); } - [Fact] - public unsafe void Load_Store_FloatArray_Ptr() - { - float[] data = new float[Block8x8F.Size]; - float[] mirror = new float[Block8x8F.Size]; - - for (int i = 0; i < Block8x8F.Size; i++) - { - data[i] = i; - } - - this.Measure( - Times, - () => - { - var b = default(Block8x8F); - Block8x8F.LoadFrom(&b, data); - Block8x8F.ScaledCopyTo(&b, mirror); - }); - - Assert.Equal(data, mirror); - - // PrintLinearData((Span)mirror); - } - - [Fact] - public void Load_Store_IntArray() - { - int[] data = new int[Block8x8F.Size]; - int[] mirror = new int[Block8x8F.Size]; - - for (int i = 0; i < Block8x8F.Size; i++) - { - data[i] = i; - } - - this.Measure( - Times, - () => - { - var v = default(Block8x8F); - v.LoadFrom(data); - v.ScaledCopyTo(mirror); - }); - - Assert.Equal(data, mirror); - - // PrintLinearData((Span)mirror); - } - [Fact] public void TransposeInplace() { diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs index b13a196cb..094622070 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs @@ -70,20 +70,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(data, result); } - [Fact] - public void Equality_WhenTrue() - { - short[] data = Create8x8ShortData(); - var block1 = Block8x8.Load(data); - var block2 = Block8x8.Load(data); - - block1[0] = 42; - block2[0] = 42; - - Assert.Equal(block1, block2); - Assert.Equal(block1.GetHashCode(), block2.GetHashCode()); - } - [Fact] public void Equality_WhenFalse() { diff --git a/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs b/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs deleted file mode 100644 index d5ee2a2b8..000000000 --- a/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using SixLabors.ImageSharp.Formats.Jpeg.Components; -using SixLabors.ImageSharp.Memory; -using SixLabors.ImageSharp.PixelFormats; - -using Xunit; - -namespace SixLabors.ImageSharp.Tests.Formats.Jpg -{ - [Trait("Format", "Jpg")] - public class GenericBlock8x8Tests - { - public static Image CreateTestImage() - where TPixel : unmanaged, IPixel - { - var image = new Image(10, 10); - Buffer2D pixels = image.GetRootFramePixelBuffer(); - for (int i = 0; i < 10; i++) - { - for (int j = 0; j < 10; j++) - { - var rgba = new Rgba32((byte)(i + 1), (byte)(j + 1), 200, 255); - var color = default(TPixel); - color.FromRgba32(rgba); - - pixels[i, j] = color; - } - } - - return image; - } - - [Theory] - [WithMemberFactory(nameof(CreateTestImage), PixelTypes.Rgb24 | PixelTypes.Rgba32 /* | PixelTypes.Rgba32 | PixelTypes.Argb32*/)] - public void LoadAndStretchCorners_FromOrigo(TestImageProvider provider) - where TPixel : unmanaged, IPixel - { - using (Image s = provider.GetImage()) - { - var d = default(GenericBlock8x8); - RowOctet rowOctet = default; - rowOctet.Update(s.GetRootFramePixelBuffer(), 0); - d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 0, 0, ref rowOctet); - - TPixel a = s.Frames.RootFrame[0, 0]; - TPixel b = d[0, 0]; - - Assert.Equal(s[0, 0], d[0, 0]); - Assert.Equal(s[1, 0], d[1, 0]); - Assert.Equal(s[7, 0], d[7, 0]); - Assert.Equal(s[0, 1], d[0, 1]); - Assert.Equal(s[1, 1], d[1, 1]); - Assert.Equal(s[7, 0], d[7, 0]); - Assert.Equal(s[0, 7], d[0, 7]); - Assert.Equal(s[7, 7], d[7, 7]); - } - } - - [Theory] - [WithMemberFactory(nameof(CreateTestImage), PixelTypes.Rgb24 | PixelTypes.Rgba32)] - public void LoadAndStretchCorners_WithOffset(TestImageProvider provider) - where TPixel : unmanaged, IPixel - { - using (Image s = provider.GetImage()) - { - var d = default(GenericBlock8x8); - RowOctet rowOctet = default; - rowOctet.Update(s.GetRootFramePixelBuffer(), 7); - - d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 6, 7, ref rowOctet); - - Assert.Equal(s[6, 7], d[0, 0]); - Assert.Equal(s[6, 8], d[0, 1]); - Assert.Equal(s[7, 8], d[1, 1]); - - Assert.Equal(s[6, 9], d[0, 2]); - Assert.Equal(s[6, 9], d[0, 3]); - Assert.Equal(s[6, 9], d[0, 7]); - - Assert.Equal(s[7, 9], d[1, 2]); - Assert.Equal(s[7, 9], d[1, 3]); - Assert.Equal(s[7, 9], d[1, 7]); - - Assert.Equal(s[9, 9], d[3, 2]); - Assert.Equal(s[9, 9], d[3, 3]); - Assert.Equal(s[9, 9], d[3, 7]); - - Assert.Equal(s[9, 7], d[3, 0]); - Assert.Equal(s[9, 7], d[4, 0]); - Assert.Equal(s[9, 7], d[7, 0]); - - Assert.Equal(s[9, 9], d[3, 2]); - Assert.Equal(s[9, 9], d[4, 2]); - Assert.Equal(s[9, 9], d[7, 2]); - - Assert.Equal(s[9, 9], d[4, 3]); - Assert.Equal(s[9, 9], d[7, 7]); - } - } - - [Fact] - public void Indexer() - { - var block = default(GenericBlock8x8); - Span span = block.AsSpanUnsafe(); - Assert.Equal(64, span.Length); - - for (int i = 0; i < 64; i++) - { - span[i] = new Rgb24((byte)i, (byte)(2 * i), (byte)(3 * i)); - } - - var expected00 = new Rgb24(0, 0, 0); - var expected07 = new Rgb24(7, 14, 21); - var expected11 = new Rgb24(9, 18, 27); - var expected77 = new Rgb24(63, 126, 189); - var expected67 = new Rgb24(62, 124, 186); - - Assert.Equal(expected00, block[0, 0]); - Assert.Equal(expected07, block[7, 0]); - Assert.Equal(expected11, block[1, 1]); - Assert.Equal(expected67, block[6, 7]); - Assert.Equal(expected77, block[7, 7]); - } - } -} diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 4e0970e3b..d6dc57e83 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -340,32 +340,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - // Benchmark, for local execution only - // [Theory] - // [InlineData(false)] - // [InlineData(true)] - public void BenchmarkYCbCr(bool simd) - { - int count = 2053; - int times = 50000; - - JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); - var result = new Vector4[count]; - - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector4(8) : new JpegColorConverter.FromYCbCrBasic(8); - - // Warm up: - converter.ConvertToRgbInplace(values); - - using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) - { - for (int i = 0; i < times; i++) - { - converter.ConvertToRgbInplace(values); - } - } - } - private static JpegColorConverter.ComponentValues CreateRandomValues( int componentCount, int inputBufferLength, diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs index 2cbc29027..dcdfc3e42 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs @@ -93,6 +93,14 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.IsType>(image); } + [Fact] + public async Task DecodeAsync_NonGeneric_CreatesRgb24Image() + { + string file = Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImages.Jpeg.Baseline.Jpeg420Small); + using Image image = await Image.LoadAsync(file); + Assert.IsType>(image); + } + [Theory] [WithFile(TestImages.Jpeg.Baseline.Calliphora, CommonNonDefaultPixelTypes)] public void JpegDecoder_IsNotBoundToSinglePixelType(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.FastFloatingPointDCT.cs b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.FastFloatingPointDCT.cs index 8920d42d4..9eb3c0103 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.FastFloatingPointDCT.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.FastFloatingPointDCT.cs @@ -94,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformFDCT(ref source); Block8x8F actual = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformFDCT_UpscaleBy8(ref source); - actual /= 8; + actual.MultiplyInPlace(0.125f); this.CompareBlocks(expected, actual, 1f); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.StandardIntegerDCT.cs b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.StandardIntegerDCT.cs index 6e25334f2..02f8ba388 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.StandardIntegerDCT.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.StandardIntegerDCT.cs @@ -51,11 +51,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformFDCT(ref source); - source += 128; + source.AddInPlace(128f); Block8x8 temp = source.RoundAsInt16Block(); Block8x8 actual8 = ReferenceImplementations.StandardIntegerDCT.Subtract128_TransformFDCT_Upscale8(ref temp); Block8x8F actual = actual8.AsFloatBlock(); - actual /= 8; + actual.MultiplyInPlace(0.125f); this.CompareBlocks(expected, actual, 1f); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs index f0f92d763..0071c623c 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; +using System.Threading; using SixLabors.ImageSharp.Formats.Jpeg; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder; using SixLabors.ImageSharp.IO; @@ -43,7 +44,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg using var bufferedStream = new BufferedReadStream(Configuration.Default, ms); // Decoding - using var converter = new SpectralConverter(Configuration.Default, cancellationToken: default); + using var converter = new SpectralConverter(Configuration.Default); var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder()); var scanDecoder = new HuffmanScanDecoder(bufferedStream, converter, cancellationToken: default); decoder.ParseStream(bufferedStream, scanDecoder, cancellationToken: default); @@ -53,7 +54,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg provider.Utility.TestName = JpegDecoderTests.DecodeBaselineJpegOutputName; // Comparison - using (Image image = new Image(Configuration.Default, converter.GetPixelBuffer(), new ImageMetadata())) + using (Image image = new Image(Configuration.Default, converter.GetPixelBuffer(CancellationToken.None), new ImageMetadata())) using (Image referenceImage = provider.GetReferenceOutputImage(appendPixelTypeToFileName: false)) { ImageSimilarityReport report = ImageComparer.Exact.CompareImagesOrFrames(referenceImage, image); diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index 907b18300..aaffac443 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -11,8 +11,74 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp [Trait("Format", "Webp")] public class LossyUtilsTests { + private static void RunTransformTwoTest() + { + // arrange + short[] src = { 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 23, 0, 0, -23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + byte[] dst = + { + 103, 103, 103, 103, 103, 103, 103, 103, 0, 0, 0, 0, 169, 169, 169, 169, 171, 171, 171, 171, + 171, 171, 171, 171, 0, 0, 0, 0, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, + 0, 0, 0, 0, 169, 169, 169, 169, 171, 171, 171, 171, 171, 171, 171, 171, 0, 0, 0, 0, + 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 0, 0, 0, 0, 169, 169, 169, 169, + 171, 171, 171, 171, 171, 171, 171, 171, 0, 0, 0, 0, 103, 103, 103, 103, 103, 103, 103, 103, + 103, 103, 103, 103, 0, 0, 0, 0, 169, 169, 169, 169, 171, 171, 171, 171, 171, 171, 171, 171, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + byte[] expected = + { + 105, 105, 105, 105, 105, 103, 100, 98, 0, 0, 0, 0, 169, 169, 169, 169, 171, 171, 171, 171, 171, 171, + 171, 171, 0, 0, 0, 0, 103, 103, 103, 103, 105, 105, 105, 105, 108, 105, 102, 100, 0, 0, 0, 0, 169, + 169, 169, 169, 171, 171, 171, 171, 171, 171, 171, 171, 0, 0, 0, 0, 103, 103, 103, 103, 105, 105, + 105, 105, 111, 109, 106, 103, 0, 0, 0, 0, 169, 169, 169, 169, 171, 171, 171, 171, 171, 171, 171, + 171, 0, 0, 0, 0, 103, 103, 103, 103, 105, 105, 105, 105, 113, 111, 108, 106, 0, 0, 0, 0, 169, 169, + 169, 169, 171, 171, 171, 171, 171, 171, 171, 171, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int[] scratch = new int[16]; + + // act + LossyUtils.TransformTwo(src, dst, scratch); + + // assert + Assert.True(expected.SequenceEqual(dst)); + } + + private static void RunTransformOneTest() + { + // arrange + short[] src = { -176, 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + byte[] dst = + { + 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, 128, 128, 128, 128, + 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, + 0, 0, 0, 129, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, + 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, 128, 128, 128, 128, + 0, 0, 0, 0, 0, 0, 0, 129 + }; + byte[] expected = + { + 111, 111, 111, 111, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 108, 108, 108, 108, 128, 128, 128, 128, + 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, + 0, 0, 0, 129, 104, 104, 104, 104, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, + 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 101, 101, 101, 101, + 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 129, 128, 128, 128, 128, 128, 128, 128, 128, + 0, 0, 0, 0, 0, 0, 0, 129 + }; + int[] scratch = new int[16]; + + // act + LossyUtils.TransformOne(src, dst, scratch); + + // assert + Assert.True(expected.SequenceEqual(dst)); + } + private static void RunVp8Sse4X4Test() { + // arrange byte[] a = { 27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, @@ -35,8 +101,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp int expected = 27; + // act int actual = LossyUtils.Vp8_Sse4X4(a, b); + // assert Assert.Equal(expected, actual); } @@ -65,6 +133,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp private static void RunHadamardTransformTest() { + // arrange byte[] a = { 27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, @@ -86,10 +155,19 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp ushort[] w = { 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2 }; int expected = 2; + // act int actual = LossyUtils.Vp8Disto4X4(a, b, w, new int[16]); + + // assert Assert.Equal(expected, actual); } + [Fact] + public void RunTransformTwo_Works() => RunTransformTwoTest(); + + [Fact] + public void RunTransformOne_Works() => RunTransformOneTest(); + [Fact] public void Vp8Sse4X4_Works() => RunVp8Sse4X4Test(); @@ -100,11 +178,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp public void HadamardTransform_Works() => RunHadamardTransformTest(); #if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void TransformTwo_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformTwoTest, HwIntrinsics.AllowAll); + + [Fact] + public void TransformTwo_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformTwoTest, HwIntrinsics.DisableHWIntrinsic); + + [Fact] + public void TransformOne_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformOneTest, HwIntrinsics.AllowAll); + + [Fact] + public void TransformOne_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformOneTest, HwIntrinsics.DisableHWIntrinsic); + [Fact] public void Vp8Sse4X4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.AllowAll); [Fact] - public void Vp8Sse4X4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.DisableHWIntrinsic); + public void Vp8Sse4X4_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.DisableSSE2); + + [Fact] + public void Vp8Sse4X4_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.DisableAVX2); [Fact] public void Mean16x4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.AllowAll); diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 80b5f0a53..ef60a7b20 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -47,7 +47,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); [Fact] - public void QuantizeBlock_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableHWIntrinsic); + public void QuantizeBlock_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2); + + [Fact] + public void QuantizeBlock_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableAVX2); #endif } } diff --git a/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs b/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs index 6bcb4f21f..2a43cb38b 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs @@ -11,6 +11,57 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp [Trait("Format", "Webp")] public class Vp8EncodingTests { + private static void RunFTransform2Test() + { + // arrange + byte[] src = { 154, 154, 151, 151, 149, 148, 151, 157, 163, 163, 154, 132, 102, 98, 104, 108, 107, 104, 104, 103, 101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150, 147, 147, 146, 159, 164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117, 172, 172, 172, 168, 170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126, 93, 90, 102, 107, 104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175, 150, 149, 152, 151, 148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100, 102, 102, 121, 117, 170, 170, 169, 171, 171, 179, 173, 175 }; + byte[] reference = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129 }; + short[] actualOutput1 = new short[16]; + short[] actualOutput2 = new short[16]; + short[] expectedOutput1 = { 182, 4, 1, 1, 6, 7, -1, -4, 5, 0, -2, 1, 2, 1, 1, 1 }; + short[] expectedOutput2 = { 192, -34, 10, 1, -11, 8, 10, -7, 6, 3, -8, 4, 5, -3, -2, 6 }; + + // act + Vp8Encoding.FTransform2(src, reference, actualOutput1, actualOutput2, new int[16]); + + // assert + Assert.True(expectedOutput1.SequenceEqual(actualOutput1)); + Assert.True(expectedOutput2.SequenceEqual(actualOutput2)); + } + + private static void RunFTransformTest() + { + // arrange + byte[] src = + { + 154, 154, 151, 151, 149, 148, 151, 157, 163, 163, 154, 132, 102, 98, 104, 108, 107, 104, 104, 103, + 101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150, 147, 147, 146, 159, + 164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117, 172, 172, 172, 168, + 170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126, 93, 90, 102, 107, + 104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175, 150, 149, 152, 151, + 148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100, 102, 102, 121, 117, + 170, 170, 169, 171, 171, 179, 173, 175 + }; + byte[] reference = + { + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 129 + }; + short[] actualOutput = new short[16]; + short[] expectedOutput = { 182, 4, 1, 1, 6, 7, -1, -4, 5, 0, -2, 1, 2, 1, 1, 1 }; + + // act + Vp8Encoding.FTransform(src, reference, actualOutput, new int[16]); + + // assert + Assert.True(expectedOutput.SequenceEqual(actualOutput)); + } + private static void RunOneInverseTransformTest() { // arrange @@ -69,12 +120,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp int[] scratch = new int[16]; // act - Vp8Encoding.ITransform(reference, input, dst, scratch); + Vp8Encoding.ITransformTwo(reference, input, dst, scratch); // assert Assert.True(dst.SequenceEqual(expected)); } + [Fact] + public void FTransform2_Works() => RunFTransform2Test(); + + [Fact] + public void FTransform_Works() => RunFTransformTest(); + [Fact] public void OneInverseTransform_Works() => RunOneInverseTransformTest(); @@ -82,6 +139,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp public void TwoInverseTransform_Works() => RunTwoInverseTransformTest(); #if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void FTransform2_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransform2Test, HwIntrinsics.AllowAll); + + [Fact] + public void FTransform2_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransform2Test, HwIntrinsics.DisableHWIntrinsic); + + [Fact] + public void FTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransformTest, HwIntrinsics.AllowAll); + + [Fact] + public void FTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunFTransformTest, HwIntrinsics.DisableHWIntrinsic); + [Fact] public void OneInverseTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunOneInverseTransformTest, HwIntrinsics.AllowAll);