mirror of https://github.com/SixLabors/ImageSharp
29 changed files with 1042 additions and 855 deletions
@ -0,0 +1,22 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter |
|||
{ |
|||
protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision) |
|||
: base(colorSpace, precision, 8) |
|||
{ |
|||
} |
|||
|
|||
protected sealed override bool IsAvailable => SimdUtils.HasAvx2; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,18 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal abstract class BasicJpegColorConverter : JpegColorConverter |
|||
{ |
|||
protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision) |
|||
: base(colorSpace, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override bool IsAvailable => true; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,81 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromCmykAvx2 : Avx2JpegColorConverter |
|||
{ |
|||
public FromCmykAvx2(int precision) |
|||
: base(JpegColorSpace.Cmyk, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> cBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> mBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> yBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector256<float> kBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var scale = Vector256.Create(1 / this.MaximumValue); |
|||
var one = Vector256.Create(1F); |
|||
|
|||
// Used for packing
|
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); |
|||
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); |
|||
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); |
|||
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); |
|||
|
|||
k = Avx.Multiply(k, scale); |
|||
|
|||
c = Avx.Multiply(Avx.Multiply(c, k), scale); |
|||
m = Avx.Multiply(Avx.Multiply(m, k), scale); |
|||
y = Avx.Multiply(Avx.Multiply(y, k), scale); |
|||
|
|||
Vector256<float> cmLo = Avx.UnpackLow(c, m); |
|||
Vector256<float> yoLo = Avx.UnpackLow(y, one); |
|||
Vector256<float> cmHi = Avx.UnpackHigh(c, m); |
|||
Vector256<float> yoHi = Avx.UnpackHigh(y, one); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); |
|||
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromCmykBasic.ConvertCore(values, result, this.MaximumValue); |
|||
} |
|||
} |
|||
} |
|||
@ -1,145 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#else
|
|||
using SixLabors.ImageSharp.Tuples; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromCmykVector8 : JpegColorConverter |
|||
{ |
|||
public FromCmykVector8(int precision) |
|||
: base(JpegColorSpace.Cmyk, precision) |
|||
{ |
|||
} |
|||
|
|||
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; |
|||
|
|||
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
int remainder = result.Length % 8; |
|||
int simdCount = result.Length - remainder; |
|||
if (simdCount > 0) |
|||
{ |
|||
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); |
|||
} |
|||
|
|||
FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); |
|||
} |
|||
|
|||
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue) |
|||
{ |
|||
// This implementation is actually AVX specific.
|
|||
// An AVX register is capable of storing 8 float-s.
|
|||
if (!IsAvailable) |
|||
{ |
|||
throw new InvalidOperationException( |
|||
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> cBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> mBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> yBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector256<float> kBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var scale = Vector256.Create(1 / maxValue); |
|||
var one = Vector256.Create(1F); |
|||
|
|||
// Used for packing
|
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); |
|||
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); |
|||
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); |
|||
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); |
|||
|
|||
k = Avx.Multiply(k, scale); |
|||
|
|||
c = Avx.Multiply(Avx.Multiply(c, k), scale); |
|||
m = Avx.Multiply(Avx.Multiply(m, k), scale); |
|||
y = Avx.Multiply(Avx.Multiply(y, k), scale); |
|||
|
|||
Vector256<float> cmLo = Avx.UnpackLow(c, m); |
|||
Vector256<float> yoLo = Avx.UnpackLow(y, one); |
|||
Vector256<float> cmHi = Avx.UnpackHigh(c, m); |
|||
Vector256<float> yoHi = Avx.UnpackHigh(y, one); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); |
|||
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); |
|||
} |
|||
#else
|
|||
ref Vector<float> cBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> mBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> yBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector<float> kBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
Vector4Pair cc = default; |
|||
Vector4Pair mm = default; |
|||
Vector4Pair yy = default; |
|||
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc); |
|||
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm); |
|||
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy); |
|||
|
|||
var scale = new Vector<float>(1 / maxValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector<float> c = Unsafe.Add(ref cBase, i); |
|||
Vector<float> m = Unsafe.Add(ref mBase, i); |
|||
Vector<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector<float> k = Unsafe.Add(ref kBase, i) * scale; |
|||
|
|||
c = (c * k) * scale; |
|||
m = (m * k) * scale; |
|||
y = (y * k) * scale; |
|||
|
|||
ccRefAsVector = c; |
|||
mmRefAsVector = m; |
|||
yyRefAsVector = y; |
|||
|
|||
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref cc, ref mm, ref yy); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,71 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Tuples; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromCmykVector8 : Vector8JpegColorConverter |
|||
{ |
|||
public FromCmykVector8(int precision) |
|||
: base(JpegColorSpace.Cmyk, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
ref Vector<float> cBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> mBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> yBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector<float> kBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
Vector4Pair cc = default; |
|||
Vector4Pair mm = default; |
|||
Vector4Pair yy = default; |
|||
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc); |
|||
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm); |
|||
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy); |
|||
|
|||
var scale = new Vector<float>(1 / this.MaximumValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector<float> c = Unsafe.Add(ref cBase, i); |
|||
Vector<float> m = Unsafe.Add(ref mBase, i); |
|||
Vector<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector<float> k = Unsafe.Add(ref kBase, i) * scale; |
|||
|
|||
c = (c * k) * scale; |
|||
m = (m * k) * scale; |
|||
y = (y * k) * scale; |
|||
|
|||
ccRefAsVector = c; |
|||
mmRefAsVector = m; |
|||
yyRefAsVector = y; |
|||
|
|||
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref cc, ref mm, ref yy); |
|||
} |
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromCmykBasic.ConvertCore(values, result, this.MaximumValue); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,63 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter |
|||
{ |
|||
public FromGrayscaleAvx2(int precision) |
|||
: base(JpegColorSpace.Grayscale, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> gBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var scale = Vector256.Create(1 / this.MaximumValue); |
|||
var one = Vector256.Create(1F); |
|||
|
|||
// Used for packing
|
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector256<float> g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); |
|||
|
|||
g = Avx.Multiply(g, scale); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); |
|||
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); |
|||
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); |
|||
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); |
|||
} |
|||
} |
|||
} |
|||
@ -1,109 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#else
|
|||
using SixLabors.ImageSharp.Tuples; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromGrayscaleVector8 : JpegColorConverter |
|||
{ |
|||
public FromGrayscaleVector8(int precision) |
|||
: base(JpegColorSpace.Grayscale, precision) |
|||
{ |
|||
} |
|||
|
|||
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; |
|||
|
|||
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
int remainder = result.Length % 8; |
|||
int simdCount = result.Length - remainder; |
|||
if (simdCount > 0) |
|||
{ |
|||
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); |
|||
} |
|||
|
|||
FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); |
|||
} |
|||
|
|||
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue) |
|||
{ |
|||
// This implementation is actually AVX specific.
|
|||
// An AVX register is capable of storing 8 float-s.
|
|||
if (!IsAvailable) |
|||
{ |
|||
throw new InvalidOperationException( |
|||
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> gBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var scale = Vector256.Create(1 / maxValue); |
|||
var one = Vector256.Create(1F); |
|||
|
|||
// Used for packing
|
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector256<float> g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); |
|||
|
|||
g = Avx.Multiply(g, scale); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); |
|||
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); |
|||
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); |
|||
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); |
|||
} |
|||
#else
|
|||
ref Vector<float> gBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
Vector4Pair gg = default; |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
|
|||
var scale = new Vector<float>(1 / maxValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector<float> g = Unsafe.Add(ref gBase, i); |
|||
g *= scale; |
|||
|
|||
ggRefAsVector = g; |
|||
|
|||
// Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref gg); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,53 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Tuples; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromGrayscaleVector8 : Vector8JpegColorConverter |
|||
{ |
|||
public FromGrayscaleVector8(int precision) |
|||
: base(JpegColorSpace.Grayscale, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
ref Vector<float> gBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
Vector4Pair gg = default; |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
|
|||
var scale = new Vector<float>(1 / this.MaximumValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector<float> g = Unsafe.Add(ref gBase, i); |
|||
g *= scale; |
|||
|
|||
ggRefAsVector = g; |
|||
|
|||
// Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref gg); |
|||
} |
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,72 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromRgbAvx2 : Avx2JpegColorConverter |
|||
{ |
|||
public FromRgbAvx2(int precision) |
|||
: base(JpegColorSpace.RGB, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> rBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> gBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> bBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var scale = Vector256.Create(1 / this.MaximumValue); |
|||
var one = Vector256.Create(1F); |
|||
|
|||
// Used for packing
|
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); |
|||
Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); |
|||
Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); |
|||
|
|||
Vector256<float> rgLo = Avx.UnpackLow(r, g); |
|||
Vector256<float> boLo = Avx.UnpackLow(b, one); |
|||
Vector256<float> rgHi = Avx.UnpackHigh(r, g); |
|||
Vector256<float> boHi = Avx.UnpackHigh(b, one); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); |
|||
Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromRgbBasic.ConvertCore(values, result, this.MaximumValue); |
|||
} |
|||
} |
|||
} |
|||
@ -1,132 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#else
|
|||
using SixLabors.ImageSharp.Tuples; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromRgbVector8 : JpegColorConverter |
|||
{ |
|||
public FromRgbVector8(int precision) |
|||
: base(JpegColorSpace.RGB, precision) |
|||
{ |
|||
} |
|||
|
|||
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; |
|||
|
|||
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
int remainder = result.Length % 8; |
|||
int simdCount = result.Length - remainder; |
|||
if (simdCount > 0) |
|||
{ |
|||
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); |
|||
} |
|||
|
|||
FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); |
|||
} |
|||
|
|||
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue) |
|||
{ |
|||
// This implementation is actually AVX specific.
|
|||
// An AVX register is capable of storing 8 float-s.
|
|||
if (!IsAvailable) |
|||
{ |
|||
throw new InvalidOperationException( |
|||
"JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> rBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> gBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> bBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var scale = Vector256.Create(1 / maxValue); |
|||
var one = Vector256.Create(1F); |
|||
|
|||
// Used for packing
|
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); |
|||
Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); |
|||
Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); |
|||
|
|||
Vector256<float> rgLo = Avx.UnpackLow(r, g); |
|||
Vector256<float> boLo = Avx.UnpackLow(b, one); |
|||
Vector256<float> rgHi = Avx.UnpackHigh(r, g); |
|||
Vector256<float> boHi = Avx.UnpackHigh(b, one); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); |
|||
Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); |
|||
Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); |
|||
} |
|||
#else
|
|||
ref Vector<float> rBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> gBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> bBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
Vector4Pair rr = default; |
|||
Vector4Pair gg = default; |
|||
Vector4Pair bb = default; |
|||
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr); |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb); |
|||
|
|||
var scale = new Vector<float>(1 / maxValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector<float> r = Unsafe.Add(ref rBase, i); |
|||
Vector<float> g = Unsafe.Add(ref gBase, i); |
|||
Vector<float> b = Unsafe.Add(ref bBase, i); |
|||
r *= scale; |
|||
g *= scale; |
|||
b *= scale; |
|||
|
|||
rrRefAsVector = r; |
|||
ggRefAsVector = g; |
|||
bbRefAsVector = b; |
|||
|
|||
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref rr, ref gg, ref bb); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,67 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Tuples; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromRgbVector8 : Vector8JpegColorConverter |
|||
{ |
|||
public FromRgbVector8(int precision) |
|||
: base(JpegColorSpace.RGB, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
ref Vector<float> rBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> gBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> bBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
Vector4Pair rr = default; |
|||
Vector4Pair gg = default; |
|||
Vector4Pair bb = default; |
|||
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr); |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb); |
|||
|
|||
var scale = new Vector<float>(1 / this.MaximumValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
Vector<float> r = Unsafe.Add(ref rBase, i); |
|||
Vector<float> g = Unsafe.Add(ref gBase, i); |
|||
Vector<float> b = Unsafe.Add(ref bBase, i); |
|||
r *= scale; |
|||
g *= scale; |
|||
b *= scale; |
|||
|
|||
rrRefAsVector = r; |
|||
ggRefAsVector = g; |
|||
bbRefAsVector = b; |
|||
|
|||
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref rr, ref gg, ref bb); |
|||
} |
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromRgbBasic.ConvertCore(values, result, this.MaximumValue); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,101 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#endif
|
|||
|
|||
// ReSharper disable ImpureMethodCallOnReadonlyValueField
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter |
|||
{ |
|||
public FromYCbCrAvx2(int precision) |
|||
: base(JpegColorSpace.YCbCr, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> yBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> cbBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> crBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var chromaOffset = Vector256.Create(-this.HalfValue); |
|||
var scale = Vector256.Create(1 / this.MaximumValue); |
|||
var rCrMult = Vector256.Create(1.402F); |
|||
var gCbMult = Vector256.Create(-0.344136F); |
|||
var gCrMult = Vector256.Create(-0.714136F); |
|||
var bCbMult = Vector256.Create(1.772F); |
|||
|
|||
// Used for packing.
|
|||
var va = Vector256.Create(1F); |
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
Vector256<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); |
|||
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); |
|||
|
|||
y = Avx2.PermuteVar8x32(y, vcontrol); |
|||
cb = Avx2.PermuteVar8x32(cb, vcontrol); |
|||
cr = Avx2.PermuteVar8x32(cr, vcontrol); |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); |
|||
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); |
|||
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); |
|||
|
|||
// TODO: We should be saving to RGBA not Vector4
|
|||
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); |
|||
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); |
|||
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); |
|||
|
|||
Vector256<float> vte = Avx.UnpackLow(r, b); |
|||
Vector256<float> vto = Avx.UnpackLow(g, va); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); |
|||
|
|||
vte = Avx.UnpackHigh(r, b); |
|||
vto = Avx.UnpackHigh(g, va); |
|||
|
|||
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); |
|||
} |
|||
} |
|||
} |
|||
@ -1,183 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#else
|
|||
using SixLabors.ImageSharp.Tuples; |
|||
#endif
|
|||
|
|||
// ReSharper disable ImpureMethodCallOnReadonlyValueField
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromYCbCrSimdVector8 : JpegColorConverter |
|||
{ |
|||
public FromYCbCrSimdVector8(int precision) |
|||
: base(JpegColorSpace.YCbCr, precision) |
|||
{ |
|||
} |
|||
|
|||
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; |
|||
|
|||
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
int remainder = result.Length % 8; |
|||
int simdCount = result.Length - remainder; |
|||
if (simdCount > 0) |
|||
{ |
|||
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); |
|||
} |
|||
|
|||
FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// SIMD convert using buffers of sizes divisible by 8.
|
|||
/// </summary>
|
|||
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue) |
|||
{ |
|||
// This implementation is actually AVX specific.
|
|||
// An AVX register is capable of storing 8 float-s.
|
|||
if (!IsAvailable) |
|||
{ |
|||
throw new InvalidOperationException( |
|||
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> yBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> cbBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> crBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var chromaOffset = Vector256.Create(-halfValue); |
|||
var scale = Vector256.Create(1 / maxValue); |
|||
var rCrMult = Vector256.Create(1.402F); |
|||
var gCbMult = Vector256.Create(-0.344136F); |
|||
var gCrMult = Vector256.Create(-0.714136F); |
|||
var bCbMult = Vector256.Create(1.772F); |
|||
|
|||
// Used for packing.
|
|||
var va = Vector256.Create(1F); |
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
Vector256<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); |
|||
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); |
|||
|
|||
y = Avx2.PermuteVar8x32(y, vcontrol); |
|||
cb = Avx2.PermuteVar8x32(cb, vcontrol); |
|||
cr = Avx2.PermuteVar8x32(cr, vcontrol); |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); |
|||
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); |
|||
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); |
|||
|
|||
// TODO: We should be saving to RGBA not Vector4
|
|||
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); |
|||
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); |
|||
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); |
|||
|
|||
Vector256<float> vte = Avx.UnpackLow(r, b); |
|||
Vector256<float> vto = Avx.UnpackLow(g, va); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); |
|||
|
|||
vte = Avx.UnpackHigh(r, b); |
|||
vto = Avx.UnpackHigh(g, va); |
|||
|
|||
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); |
|||
} |
|||
#else
|
|||
ref Vector<float> yBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> cbBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> crBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
var chromaOffset = new Vector<float>(-halfValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
|
|||
Vector4Pair rr = default; |
|||
Vector4Pair gg = default; |
|||
Vector4Pair bb = default; |
|||
|
|||
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr); |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb); |
|||
|
|||
var scale = new Vector<float>(1 / maxValue); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
Vector<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset; |
|||
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset; |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector<float> r = y + (cr * new Vector<float>(1.402F)); |
|||
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F)); |
|||
Vector<float> b = y + (cb * new Vector<float>(1.772F)); |
|||
|
|||
r = r.FastRound(); |
|||
g = g.FastRound(); |
|||
b = b.FastRound(); |
|||
r *= scale; |
|||
g *= scale; |
|||
b *= scale; |
|||
|
|||
rrRefAsVector = r; |
|||
ggRefAsVector = g; |
|||
bbRefAsVector = b; |
|||
|
|||
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref rr, ref gg, ref bb); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,87 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Tuples; |
|||
|
|||
// ReSharper disable ImpureMethodCallOnReadonlyValueField
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter |
|||
{ |
|||
public FromYCbCrVector8(int precision) |
|||
: base(JpegColorSpace.YCbCr, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
ref Vector<float> yBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> cbBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> crBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
var chromaOffset = new Vector<float>(-this.HalfValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
|
|||
Vector4Pair rr = default; |
|||
Vector4Pair gg = default; |
|||
Vector4Pair bb = default; |
|||
|
|||
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr); |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb); |
|||
|
|||
var scale = new Vector<float>(1 / this.MaximumValue); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
Vector<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset; |
|||
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset; |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector<float> r = y + (cr * new Vector<float>(1.402F)); |
|||
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F)); |
|||
Vector<float> b = y + (cb * new Vector<float>(1.772F)); |
|||
|
|||
r = r.FastRound(); |
|||
g = g.FastRound(); |
|||
b = b.FastRound(); |
|||
r *= scale; |
|||
g *= scale; |
|||
b *= scale; |
|||
|
|||
rrRefAsVector = r; |
|||
ggRefAsVector = g; |
|||
bbRefAsVector = b; |
|||
|
|||
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref rr, ref gg, ref bb); |
|||
} |
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,110 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromYccKAvx2 : Avx2JpegColorConverter |
|||
{ |
|||
public FromYccKAvx2(int precision) |
|||
: base(JpegColorSpace.Ycck, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> yBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> cbBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> crBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector256<float> kBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var chromaOffset = Vector256.Create(-this.HalfValue); |
|||
var scale = Vector256.Create(1 / this.MaximumValue); |
|||
var max = Vector256.Create(this.MaximumValue); |
|||
var rCrMult = Vector256.Create(1.402F); |
|||
var gCbMult = Vector256.Create(-0.344136F); |
|||
var gCrMult = Vector256.Create(-0.714136F); |
|||
var bCbMult = Vector256.Create(1.772F); |
|||
|
|||
// Used for packing.
|
|||
var va = Vector256.Create(1F); |
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
// k = kVals[i] / 256F;
|
|||
Vector256<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); |
|||
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); |
|||
Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max); |
|||
|
|||
y = Avx2.PermuteVar8x32(y, vcontrol); |
|||
cb = Avx2.PermuteVar8x32(cb, vcontrol); |
|||
cr = Avx2.PermuteVar8x32(cr, vcontrol); |
|||
k = Avx2.PermuteVar8x32(k, vcontrol); |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); |
|||
Vector256<float> g = |
|||
HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); |
|||
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); |
|||
|
|||
r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); |
|||
g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); |
|||
b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); |
|||
|
|||
r = Avx.Multiply(Avx.Multiply(r, k), scale); |
|||
g = Avx.Multiply(Avx.Multiply(g, k), scale); |
|||
b = Avx.Multiply(Avx.Multiply(b, k), scale); |
|||
|
|||
Vector256<float> vte = Avx.UnpackLow(r, b); |
|||
Vector256<float> vto = Avx.UnpackLow(g, va); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); |
|||
|
|||
vte = Avx.UnpackHigh(r, b); |
|||
vto = Avx.UnpackHigh(g, va); |
|||
|
|||
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); |
|||
} |
|||
#endif
|
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); |
|||
} |
|||
} |
|||
} |
|||
@ -1,193 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using static SixLabors.ImageSharp.SimdUtils; |
|||
#else
|
|||
using SixLabors.ImageSharp.Tuples; |
|||
#endif
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromYccKVector8 : JpegColorConverter |
|||
{ |
|||
public FromYccKVector8(int precision) |
|||
: base(JpegColorSpace.Ycck, precision) |
|||
{ |
|||
} |
|||
|
|||
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; |
|||
|
|||
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
int remainder = result.Length % 8; |
|||
int simdCount = result.Length - remainder; |
|||
if (simdCount > 0) |
|||
{ |
|||
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); |
|||
} |
|||
|
|||
FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); |
|||
} |
|||
|
|||
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue) |
|||
{ |
|||
// This implementation is actually AVX specific.
|
|||
// An AVX register is capable of storing 8 float-s.
|
|||
if (!IsAvailable) |
|||
{ |
|||
throw new InvalidOperationException( |
|||
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
ref Vector256<float> yBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector256<float> cbBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector256<float> crBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector256<float> kBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector256<float> resultBase = |
|||
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
// Used for the color conversion
|
|||
var chromaOffset = Vector256.Create(-halfValue); |
|||
var scale = Vector256.Create(1 / maxValue); |
|||
var max = Vector256.Create(maxValue); |
|||
var rCrMult = Vector256.Create(1.402F); |
|||
var gCbMult = Vector256.Create(-0.344136F); |
|||
var gCrMult = Vector256.Create(-0.714136F); |
|||
var bCbMult = Vector256.Create(1.772F); |
|||
|
|||
// Used for packing.
|
|||
var va = Vector256.Create(1F); |
|||
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); |
|||
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
// k = kVals[i] / 256F;
|
|||
Vector256<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); |
|||
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); |
|||
Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max); |
|||
|
|||
y = Avx2.PermuteVar8x32(y, vcontrol); |
|||
cb = Avx2.PermuteVar8x32(cb, vcontrol); |
|||
cr = Avx2.PermuteVar8x32(cr, vcontrol); |
|||
k = Avx2.PermuteVar8x32(k, vcontrol); |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); |
|||
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); |
|||
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); |
|||
|
|||
r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); |
|||
g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); |
|||
b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); |
|||
|
|||
r = Avx.Multiply(Avx.Multiply(r, k), scale); |
|||
g = Avx.Multiply(Avx.Multiply(g, k), scale); |
|||
b = Avx.Multiply(Avx.Multiply(b, k), scale); |
|||
|
|||
Vector256<float> vte = Avx.UnpackLow(r, b); |
|||
Vector256<float> vto = Avx.UnpackLow(g, va); |
|||
|
|||
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4); |
|||
|
|||
destination = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); |
|||
|
|||
vte = Avx.UnpackHigh(r, b); |
|||
vto = Avx.UnpackHigh(g, va); |
|||
|
|||
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); |
|||
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); |
|||
} |
|||
#else
|
|||
ref Vector<float> yBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> cbBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> crBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector<float> kBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
var chromaOffset = new Vector<float>(-halfValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
|
|||
Vector4Pair rr = default; |
|||
Vector4Pair gg = default; |
|||
Vector4Pair bb = default; |
|||
|
|||
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr); |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb); |
|||
|
|||
var scale = new Vector<float>(1 / maxValue); |
|||
var max = new Vector<float>(maxValue); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
// k = kVals[i] / 256F;
|
|||
Vector<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset; |
|||
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset; |
|||
Vector<float> k = Unsafe.Add(ref kBase, i) / max; |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector<float> r = y + (cr * new Vector<float>(1.402F)); |
|||
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F)); |
|||
Vector<float> b = y + (cb * new Vector<float>(1.772F)); |
|||
|
|||
r = (max - r.FastRound()) * k; |
|||
g = (max - g.FastRound()) * k; |
|||
b = (max - b.FastRound()) * k; |
|||
r *= scale; |
|||
g *= scale; |
|||
b *= scale; |
|||
|
|||
rrRefAsVector = r; |
|||
ggRefAsVector = g; |
|||
bbRefAsVector = b; |
|||
|
|||
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref rr, ref gg, ref bb); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,91 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using SixLabors.ImageSharp.Tuples; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal sealed class FromYccKVector8 : Vector8JpegColorConverter |
|||
{ |
|||
public FromYccKVector8(int precision) |
|||
: base(JpegColorSpace.Ycck, precision) |
|||
{ |
|||
} |
|||
|
|||
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
ref Vector<float> yBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0)); |
|||
ref Vector<float> cbBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1)); |
|||
ref Vector<float> crBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2)); |
|||
ref Vector<float> kBase = |
|||
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3)); |
|||
|
|||
ref Vector4Octet resultBase = |
|||
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result)); |
|||
|
|||
var chromaOffset = new Vector<float>(-this.HalfValue); |
|||
|
|||
// Walking 8 elements at one step:
|
|||
int n = result.Length / 8; |
|||
|
|||
Vector4Pair rr = default; |
|||
Vector4Pair gg = default; |
|||
Vector4Pair bb = default; |
|||
|
|||
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr); |
|||
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg); |
|||
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb); |
|||
|
|||
var scale = new Vector<float>(1 / this.MaximumValue); |
|||
var max = new Vector<float>(this.MaximumValue); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
// y = yVals[i];
|
|||
// cb = cbVals[i] - 128F;
|
|||
// cr = crVals[i] - 128F;
|
|||
// k = kVals[i] / 256F;
|
|||
Vector<float> y = Unsafe.Add(ref yBase, i); |
|||
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset; |
|||
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset; |
|||
Vector<float> k = Unsafe.Add(ref kBase, i) / max; |
|||
|
|||
// r = y + (1.402F * cr);
|
|||
// g = y - (0.344136F * cb) - (0.714136F * cr);
|
|||
// b = y + (1.772F * cb);
|
|||
// Adding & multiplying 8 elements at one time:
|
|||
Vector<float> r = y + (cr * new Vector<float>(1.402F)); |
|||
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F)); |
|||
Vector<float> b = y + (cb * new Vector<float>(1.772F)); |
|||
|
|||
r = (max - r.FastRound()) * k; |
|||
g = (max - g.FastRound()) * k; |
|||
b = (max - b.FastRound()) * k; |
|||
r *= scale; |
|||
g *= scale; |
|||
b *= scale; |
|||
|
|||
rrRefAsVector = r; |
|||
ggRefAsVector = g; |
|||
bbRefAsVector = b; |
|||
|
|||
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
|
|||
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); |
|||
destination.Pack(ref rr, ref gg, ref bb); |
|||
} |
|||
} |
|||
|
|||
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) => |
|||
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,18 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter |
|||
{ |
|||
protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision) |
|||
: base(colorSpace, precision, 8) |
|||
{ |
|||
} |
|||
|
|||
protected sealed override bool IsAvailable => SimdUtils.HasVector8; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,46 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters |
|||
{ |
|||
internal abstract partial class JpegColorConverter |
|||
{ |
|||
internal abstract class VectorizedJpegColorConverter : JpegColorConverter |
|||
{ |
|||
private readonly int vectorSize; |
|||
|
|||
protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize) |
|||
: base(colorSpace, precision) |
|||
{ |
|||
this.vectorSize = vectorSize; |
|||
} |
|||
|
|||
public sealed override void ConvertToRgba(in ComponentValues values, Span<Vector4> result) |
|||
{ |
|||
int remainder = result.Length % this.vectorSize; |
|||
int simdCount = result.Length - remainder; |
|||
if (simdCount > 0) |
|||
{ |
|||
// This implementation is actually AVX specific.
|
|||
// An AVX register is capable of storing 8 float-s.
|
|||
if (!this.IsAvailable) |
|||
{ |
|||
throw new InvalidOperationException( |
|||
"This converter can be used only on architecture having 256 byte floating point SIMD registers!"); |
|||
} |
|||
|
|||
this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount)); |
|||
} |
|||
|
|||
this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder)); |
|||
} |
|||
|
|||
protected abstract void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result); |
|||
|
|||
protected abstract void ConvertCore(in ComponentValues values, Span<Vector4> result); |
|||
} |
|||
} |
|||
} |
|||
Loading…
Reference in new issue