|
|
|
@ -28,16 +28,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|
|
|
int simdCount = result.Length - remainder; |
|
|
|
if (simdCount > 0) |
|
|
|
{ |
|
|
|
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount)); |
|
|
|
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), |
|
|
|
this.MaximumValue, this.HalfValue); |
|
|
|
} |
|
|
|
|
|
|
|
FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder)); |
|
|
|
FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), |
|
|
|
this.MaximumValue, this.HalfValue);; |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// SIMD convert using buffers of sizes divisible by 8.
|
|
|
|
/// </summary>
|
|
|
|
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result) |
|
|
|
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue) |
|
|
|
{ |
|
|
|
// This implementation is actually AVX specific.
|
|
|
|
// An AVX register is capable of storing 8 float-s.
|
|
|
|
@ -57,7 +59,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|
|
|
ref Vector4Octet resultBase = |
|
|
|
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|
|
|
|
|
|
|
var chromaOffset = new Vector<float>(-128f); |
|
|
|
var chromaOffset = new Vector<float>(-halfValue); |
|
|
|
|
|
|
|
// Walking 8 elements at one step:
|
|
|
|
int n = result.Length / 8; |
|
|
|
@ -70,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|
|
|
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|
|
|
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb); |
|
|
|
|
|
|
|
var scale = new Vector<float>(1 / 255f); |
|
|
|
var scale = new Vector<float>(1 / maxValue); |
|
|
|
|
|
|
|
for (int i = 0; i < n; i++) |
|
|
|
{ |
|
|
|
|