mirror of https://github.com/SixLabors/ImageSharp
22 changed files with 624 additions and 578 deletions
@ -1,145 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
internal partial struct Block8x8F |
|||
{ |
|||
/// <summary>
|
|||
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
|
|||
/// </summary>
|
|||
public const int RowCount = 8; |
|||
|
|||
[FieldOffset(0)] |
|||
public Vector256<float> V0; |
|||
[FieldOffset(32)] |
|||
public Vector256<float> V1; |
|||
[FieldOffset(64)] |
|||
public Vector256<float> V2; |
|||
[FieldOffset(96)] |
|||
public Vector256<float> V3; |
|||
[FieldOffset(128)] |
|||
public Vector256<float> V4; |
|||
[FieldOffset(160)] |
|||
public Vector256<float> V5; |
|||
[FieldOffset(192)] |
|||
public Vector256<float> V6; |
|||
[FieldOffset(224)] |
|||
public Vector256<float> V7; |
|||
|
|||
private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
|||
{ |
|||
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!"); |
|||
|
|||
ref Vector256<float> aBase = ref a.V0; |
|||
ref Vector256<float> bBase = ref b.V0; |
|||
|
|||
ref Vector256<short> destRef = ref dest.V01; |
|||
Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7); |
|||
|
|||
for (nuint i = 0; i < 8; i += 2) |
|||
{ |
|||
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0))); |
|||
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1))); |
|||
|
|||
Vector256<short> row = Avx2.PackSignedSaturate(row0, row1); |
|||
row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16(); |
|||
|
|||
Unsafe.Add(ref destRef, i / 2) = row; |
|||
} |
|||
} |
|||
|
|||
private static void MultiplyIntoInt16_Sse2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
|||
{ |
|||
DebugGuard.IsTrue(Sse2.IsSupported, "Sse2 support is required to run this operation!"); |
|||
|
|||
ref Vector128<float> aBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref a); |
|||
ref Vector128<float> bBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref b); |
|||
|
|||
ref Vector128<short> destBase = ref Unsafe.As<Block8x8, Vector128<short>>(ref dest); |
|||
|
|||
// TODO: We can use the v128 utilities for this.
|
|||
for (nuint i = 0; i < 16; i += 2) |
|||
{ |
|||
Vector128<int> left = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0))); |
|||
Vector128<int> right = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1))); |
|||
|
|||
Vector128<short> row = Sse2.PackSignedSaturate(left, right); |
|||
Unsafe.Add(ref destBase, i / 2) = row; |
|||
} |
|||
} |
|||
|
|||
private void TransposeInplace_Avx() |
|||
{ |
|||
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
|
|||
Vector256<float> r0 = Avx.InsertVector128( |
|||
this.V0, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L), |
|||
1); |
|||
|
|||
Vector256<float> r1 = Avx.InsertVector128( |
|||
this.V1, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L), |
|||
1); |
|||
|
|||
Vector256<float> r2 = Avx.InsertVector128( |
|||
this.V2, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L), |
|||
1); |
|||
|
|||
Vector256<float> r3 = Avx.InsertVector128( |
|||
this.V3, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L), |
|||
1); |
|||
|
|||
Vector256<float> r4 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R), |
|||
1); |
|||
|
|||
Vector256<float> r5 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R), |
|||
1); |
|||
|
|||
Vector256<float> r6 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R), |
|||
1); |
|||
|
|||
Vector256<float> r7 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R), |
|||
1); |
|||
|
|||
Vector256<float> t0 = Avx.UnpackLow(r0, r1); |
|||
Vector256<float> t2 = Avx.UnpackLow(r2, r3); |
|||
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E); |
|||
this.V0 = Avx.Blend(t0, v, 0xCC); |
|||
this.V1 = Avx.Blend(t2, v, 0x33); |
|||
|
|||
Vector256<float> t4 = Avx.UnpackLow(r4, r5); |
|||
Vector256<float> t6 = Avx.UnpackLow(r6, r7); |
|||
v = Avx.Shuffle(t4, t6, 0x4E); |
|||
this.V4 = Avx.Blend(t4, v, 0xCC); |
|||
this.V5 = Avx.Blend(t6, v, 0x33); |
|||
|
|||
Vector256<float> t1 = Avx.UnpackHigh(r0, r1); |
|||
Vector256<float> t3 = Avx.UnpackHigh(r2, r3); |
|||
v = Avx.Shuffle(t1, t3, 0x4E); |
|||
this.V2 = Avx.Blend(t1, v, 0xCC); |
|||
this.V3 = Avx.Blend(t3, v, 0x33); |
|||
|
|||
Vector256<float> t5 = Avx.UnpackHigh(r4, r5); |
|||
Vector256<float> t7 = Avx.UnpackHigh(r6, r7); |
|||
v = Avx.Shuffle(t5, t7, 0x4E); |
|||
this.V6 = Avx.Blend(t5, v, 0xCC); |
|||
this.V7 = Avx.Blend(t7, v, 0x33); |
|||
} |
|||
} |
|||
@ -1,183 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.Intrinsics; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
internal partial struct Block8x8F |
|||
{ |
|||
/// <summary>
|
|||
/// Level shift by +maximum/2, clip to [0, maximum]
|
|||
/// </summary>
|
|||
/// <param name="maximum">The maximum value to normalize to.</param>
|
|||
public void NormalizeColorsInPlace(float maximum) |
|||
{ |
|||
Vector4 min = Vector4.Zero; |
|||
Vector4 max = new(maximum); |
|||
Vector4 off = new(MathF.Ceiling(maximum * 0.5F)); |
|||
|
|||
this.V0L = Vector4.Clamp(this.V0L + off, min, max); |
|||
this.V0R = Vector4.Clamp(this.V0R + off, min, max); |
|||
this.V1L = Vector4.Clamp(this.V1L + off, min, max); |
|||
this.V1R = Vector4.Clamp(this.V1R + off, min, max); |
|||
this.V2L = Vector4.Clamp(this.V2L + off, min, max); |
|||
this.V2R = Vector4.Clamp(this.V2R + off, min, max); |
|||
this.V3L = Vector4.Clamp(this.V3L + off, min, max); |
|||
this.V3R = Vector4.Clamp(this.V3R + off, min, max); |
|||
this.V4L = Vector4.Clamp(this.V4L + off, min, max); |
|||
this.V4R = Vector4.Clamp(this.V4R + off, min, max); |
|||
this.V5L = Vector4.Clamp(this.V5L + off, min, max); |
|||
this.V5R = Vector4.Clamp(this.V5R + off, min, max); |
|||
this.V6L = Vector4.Clamp(this.V6L + off, min, max); |
|||
this.V6R = Vector4.Clamp(this.V6R + off, min, max); |
|||
this.V7L = Vector4.Clamp(this.V7L + off, min, max); |
|||
this.V7R = Vector4.Clamp(this.V7R + off, min, max); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="Vector256{Single}"/> version of <see cref="NormalizeColorsInPlace(float)"/> and <see cref="RoundInPlace()"/>.
|
|||
/// </summary>
|
|||
/// <param name="maximum">The maximum value to normalize to.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void NormalizeColorsAndRoundInPlaceVector256(float maximum) |
|||
{ |
|||
Vector256<float> off = Vector256.Create(MathF.Ceiling(maximum * 0.5F)); |
|||
Vector256<float> max = Vector256.Create(maximum); |
|||
|
|||
ref Vector256<float> row0 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V0L); |
|||
row0 = NormalizeAndRoundVector256(row0, off, max); |
|||
|
|||
ref Vector256<float> row1 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V1L); |
|||
row1 = NormalizeAndRoundVector256(row1, off, max); |
|||
|
|||
ref Vector256<float> row2 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V2L); |
|||
row2 = NormalizeAndRoundVector256(row2, off, max); |
|||
|
|||
ref Vector256<float> row3 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V3L); |
|||
row3 = NormalizeAndRoundVector256(row3, off, max); |
|||
|
|||
ref Vector256<float> row4 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V4L); |
|||
row4 = NormalizeAndRoundVector256(row4, off, max); |
|||
|
|||
ref Vector256<float> row5 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V5L); |
|||
row5 = NormalizeAndRoundVector256(row5, off, max); |
|||
|
|||
ref Vector256<float> row6 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V6L); |
|||
row6 = NormalizeAndRoundVector256(row6, off, max); |
|||
|
|||
ref Vector256<float> row7 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V7L); |
|||
row7 = NormalizeAndRoundVector256(row7, off, max); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="Vector128{Single}"/> version of <see cref="NormalizeColorsInPlace(float)"/> and <see cref="RoundInPlace()"/>.
|
|||
/// </summary>
|
|||
/// <param name="maximum">The maximum value to normalize to.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void NormalizeColorsAndRoundInPlaceVector128(float maximum) |
|||
{ |
|||
Vector128<float> off = Vector128.Create(MathF.Ceiling(maximum * 0.5F)); |
|||
Vector128<float> max = Vector128.Create(maximum); |
|||
|
|||
this.V0L = NormalizeAndRoundVector128(this.V0L.AsVector128(), off, max).AsVector4(); |
|||
this.V0R = NormalizeAndRoundVector128(this.V0R.AsVector128(), off, max).AsVector4(); |
|||
this.V1L = NormalizeAndRoundVector128(this.V1L.AsVector128(), off, max).AsVector4(); |
|||
this.V1R = NormalizeAndRoundVector128(this.V1R.AsVector128(), off, max).AsVector4(); |
|||
this.V2L = NormalizeAndRoundVector128(this.V2L.AsVector128(), off, max).AsVector4(); |
|||
this.V2R = NormalizeAndRoundVector128(this.V2R.AsVector128(), off, max).AsVector4(); |
|||
this.V3L = NormalizeAndRoundVector128(this.V3L.AsVector128(), off, max).AsVector4(); |
|||
this.V3R = NormalizeAndRoundVector128(this.V3R.AsVector128(), off, max).AsVector4(); |
|||
this.V4L = NormalizeAndRoundVector128(this.V4L.AsVector128(), off, max).AsVector4(); |
|||
this.V4R = NormalizeAndRoundVector128(this.V4R.AsVector128(), off, max).AsVector4(); |
|||
this.V5L = NormalizeAndRoundVector128(this.V5L.AsVector128(), off, max).AsVector4(); |
|||
this.V5R = NormalizeAndRoundVector128(this.V5R.AsVector128(), off, max).AsVector4(); |
|||
this.V6L = NormalizeAndRoundVector128(this.V6L.AsVector128(), off, max).AsVector4(); |
|||
this.V6R = NormalizeAndRoundVector128(this.V6R.AsVector128(), off, max).AsVector4(); |
|||
this.V7L = NormalizeAndRoundVector128(this.V7L.AsVector128(), off, max).AsVector4(); |
|||
this.V7R = NormalizeAndRoundVector128(this.V7R.AsVector128(), off, max).AsVector4(); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Fill the block from 'source' doing short -> float conversion.
|
|||
/// </summary>
|
|||
/// <param name="source">The source block</param>
|
|||
public void LoadFromInt16Scalar(ref Block8x8 source) |
|||
{ |
|||
ref short selfRef = ref Unsafe.As<Block8x8, short>(ref source); |
|||
|
|||
this.V0L.X = Unsafe.Add(ref selfRef, 0); |
|||
this.V0L.Y = Unsafe.Add(ref selfRef, 1); |
|||
this.V0L.Z = Unsafe.Add(ref selfRef, 2); |
|||
this.V0L.W = Unsafe.Add(ref selfRef, 3); |
|||
this.V0R.X = Unsafe.Add(ref selfRef, 4); |
|||
this.V0R.Y = Unsafe.Add(ref selfRef, 5); |
|||
this.V0R.Z = Unsafe.Add(ref selfRef, 6); |
|||
this.V0R.W = Unsafe.Add(ref selfRef, 7); |
|||
|
|||
this.V1L.X = Unsafe.Add(ref selfRef, 8); |
|||
this.V1L.Y = Unsafe.Add(ref selfRef, 9); |
|||
this.V1L.Z = Unsafe.Add(ref selfRef, 10); |
|||
this.V1L.W = Unsafe.Add(ref selfRef, 11); |
|||
this.V1R.X = Unsafe.Add(ref selfRef, 12); |
|||
this.V1R.Y = Unsafe.Add(ref selfRef, 13); |
|||
this.V1R.Z = Unsafe.Add(ref selfRef, 14); |
|||
this.V1R.W = Unsafe.Add(ref selfRef, 15); |
|||
|
|||
this.V2L.X = Unsafe.Add(ref selfRef, 16); |
|||
this.V2L.Y = Unsafe.Add(ref selfRef, 17); |
|||
this.V2L.Z = Unsafe.Add(ref selfRef, 18); |
|||
this.V2L.W = Unsafe.Add(ref selfRef, 19); |
|||
this.V2R.X = Unsafe.Add(ref selfRef, 20); |
|||
this.V2R.Y = Unsafe.Add(ref selfRef, 21); |
|||
this.V2R.Z = Unsafe.Add(ref selfRef, 22); |
|||
this.V2R.W = Unsafe.Add(ref selfRef, 23); |
|||
|
|||
this.V3L.X = Unsafe.Add(ref selfRef, 24); |
|||
this.V3L.Y = Unsafe.Add(ref selfRef, 25); |
|||
this.V3L.Z = Unsafe.Add(ref selfRef, 26); |
|||
this.V3L.W = Unsafe.Add(ref selfRef, 27); |
|||
this.V3R.X = Unsafe.Add(ref selfRef, 28); |
|||
this.V3R.Y = Unsafe.Add(ref selfRef, 29); |
|||
this.V3R.Z = Unsafe.Add(ref selfRef, 30); |
|||
this.V3R.W = Unsafe.Add(ref selfRef, 31); |
|||
|
|||
this.V4L.X = Unsafe.Add(ref selfRef, 32); |
|||
this.V4L.Y = Unsafe.Add(ref selfRef, 33); |
|||
this.V4L.Z = Unsafe.Add(ref selfRef, 34); |
|||
this.V4L.W = Unsafe.Add(ref selfRef, 35); |
|||
this.V4R.X = Unsafe.Add(ref selfRef, 36); |
|||
this.V4R.Y = Unsafe.Add(ref selfRef, 37); |
|||
this.V4R.Z = Unsafe.Add(ref selfRef, 38); |
|||
this.V4R.W = Unsafe.Add(ref selfRef, 39); |
|||
|
|||
this.V5L.X = Unsafe.Add(ref selfRef, 40); |
|||
this.V5L.Y = Unsafe.Add(ref selfRef, 41); |
|||
this.V5L.Z = Unsafe.Add(ref selfRef, 42); |
|||
this.V5L.W = Unsafe.Add(ref selfRef, 43); |
|||
this.V5R.X = Unsafe.Add(ref selfRef, 44); |
|||
this.V5R.Y = Unsafe.Add(ref selfRef, 45); |
|||
this.V5R.Z = Unsafe.Add(ref selfRef, 46); |
|||
this.V5R.W = Unsafe.Add(ref selfRef, 47); |
|||
|
|||
this.V6L.X = Unsafe.Add(ref selfRef, 48); |
|||
this.V6L.Y = Unsafe.Add(ref selfRef, 49); |
|||
this.V6L.Z = Unsafe.Add(ref selfRef, 50); |
|||
this.V6L.W = Unsafe.Add(ref selfRef, 51); |
|||
this.V6R.X = Unsafe.Add(ref selfRef, 52); |
|||
this.V6R.Y = Unsafe.Add(ref selfRef, 53); |
|||
this.V6R.Z = Unsafe.Add(ref selfRef, 54); |
|||
this.V6R.W = Unsafe.Add(ref selfRef, 55); |
|||
|
|||
this.V7L.X = Unsafe.Add(ref selfRef, 56); |
|||
this.V7L.Y = Unsafe.Add(ref selfRef, 57); |
|||
this.V7L.Z = Unsafe.Add(ref selfRef, 58); |
|||
this.V7L.W = Unsafe.Add(ref selfRef, 59); |
|||
this.V7R.X = Unsafe.Add(ref selfRef, 60); |
|||
this.V7R.Y = Unsafe.Add(ref selfRef, 61); |
|||
this.V7R.Z = Unsafe.Add(ref selfRef, 62); |
|||
this.V7R.W = Unsafe.Add(ref selfRef, 63); |
|||
} |
|||
} |
|||
@ -0,0 +1,66 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using SixLabors.ImageSharp.Common.Helpers; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
/// <content>
|
|||
/// <see cref="Vector128{Single}"/> version of <see cref="Block8x8F"/>.
|
|||
/// </content>
|
|||
internal partial struct Block8x8F |
|||
{ |
|||
/// <summary>
|
|||
/// <see cref="Vector128{Single}"/> version of <see cref="NormalizeColorsInPlace(float)"/> and <see cref="RoundInPlace()"/>.
|
|||
/// </summary>
|
|||
/// <param name="maximum">The maximum value to normalize to.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void NormalizeColorsAndRoundInPlaceVector128(float maximum) |
|||
{ |
|||
Vector128<float> off = Vector128.Create(MathF.Ceiling(maximum * 0.5F)); |
|||
Vector128<float> max = Vector128.Create(maximum); |
|||
|
|||
this.V0L = NormalizeAndRoundVector128(this.V0L.AsVector128(), off, max).AsVector4(); |
|||
this.V0R = NormalizeAndRoundVector128(this.V0R.AsVector128(), off, max).AsVector4(); |
|||
this.V1L = NormalizeAndRoundVector128(this.V1L.AsVector128(), off, max).AsVector4(); |
|||
this.V1R = NormalizeAndRoundVector128(this.V1R.AsVector128(), off, max).AsVector4(); |
|||
this.V2L = NormalizeAndRoundVector128(this.V2L.AsVector128(), off, max).AsVector4(); |
|||
this.V2R = NormalizeAndRoundVector128(this.V2R.AsVector128(), off, max).AsVector4(); |
|||
this.V3L = NormalizeAndRoundVector128(this.V3L.AsVector128(), off, max).AsVector4(); |
|||
this.V3R = NormalizeAndRoundVector128(this.V3R.AsVector128(), off, max).AsVector4(); |
|||
this.V4L = NormalizeAndRoundVector128(this.V4L.AsVector128(), off, max).AsVector4(); |
|||
this.V4R = NormalizeAndRoundVector128(this.V4R.AsVector128(), off, max).AsVector4(); |
|||
this.V5L = NormalizeAndRoundVector128(this.V5L.AsVector128(), off, max).AsVector4(); |
|||
this.V5R = NormalizeAndRoundVector128(this.V5R.AsVector128(), off, max).AsVector4(); |
|||
this.V6L = NormalizeAndRoundVector128(this.V6L.AsVector128(), off, max).AsVector4(); |
|||
this.V6R = NormalizeAndRoundVector128(this.V6R.AsVector128(), off, max).AsVector4(); |
|||
this.V7L = NormalizeAndRoundVector128(this.V7L.AsVector128(), off, max).AsVector4(); |
|||
this.V7R = NormalizeAndRoundVector128(this.V7R.AsVector128(), off, max).AsVector4(); |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static Vector128<float> NormalizeAndRoundVector128(Vector128<float> value, Vector128<float> off, Vector128<float> max) |
|||
=> Vector128_.RoundToNearestInteger(Vector128_.Clamp(value + off, Vector128<float>.Zero, max)); |
|||
|
|||
private static void MultiplyIntoInt16_Sse2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
|||
{ |
|||
DebugGuard.IsTrue(Sse2.IsSupported, "Sse2 support is required to run this operation!"); |
|||
|
|||
ref Vector128<float> aBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref a); |
|||
ref Vector128<float> bBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref b); |
|||
|
|||
ref Vector128<short> destBase = ref Unsafe.As<Block8x8, Vector128<short>>(ref dest); |
|||
|
|||
// TODO: We can use the v128 utilities for this.
|
|||
for (nuint i = 0; i < 16; i += 2) |
|||
{ |
|||
Vector128<int> left = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0))); |
|||
Vector128<int> right = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1))); |
|||
|
|||
Unsafe.Add(ref destBase, i / 2) = Sse2.PackSignedSaturate(left, right); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,191 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Six Labors Split License.
|
|||
|
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
using SixLabors.ImageSharp.Common.Helpers; |
|||
|
|||
namespace SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
/// <content>
|
|||
/// <see cref="Vector128{Single}"/> version of <see cref="Block8x8F"/>.
|
|||
/// </content>
|
|||
internal partial struct Block8x8F |
|||
{ |
|||
/// <summary>
|
|||
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
|
|||
/// </summary>
|
|||
public const int RowCount = 8; |
|||
|
|||
#pragma warning disable SA1310 // Field names should not contain underscore
|
|||
[FieldOffset(0)] |
|||
public Vector256<float> V256_0; |
|||
[FieldOffset(32)] |
|||
public Vector256<float> V256_1; |
|||
[FieldOffset(64)] |
|||
public Vector256<float> V256_2; |
|||
[FieldOffset(96)] |
|||
public Vector256<float> V256_3; |
|||
[FieldOffset(128)] |
|||
public Vector256<float> V256_4; |
|||
[FieldOffset(160)] |
|||
public Vector256<float> V256_5; |
|||
[FieldOffset(192)] |
|||
public Vector256<float> V256_6; |
|||
[FieldOffset(224)] |
|||
public Vector256<float> V256_7; |
|||
#pragma warning restore SA1310 // Field names should not contain underscore
|
|||
|
|||
/// <summary>
|
|||
/// <see cref="Vector256{Single}"/> version of <see cref="NormalizeColorsInPlace(float)"/> and <see cref="RoundInPlace()"/>.
|
|||
/// </summary>
|
|||
/// <param name="maximum">The maximum value to normalize to.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void NormalizeColorsAndRoundInPlaceVector256(float maximum) |
|||
{ |
|||
Vector256<float> off = Vector256.Create(MathF.Ceiling(maximum * 0.5F)); |
|||
Vector256<float> max = Vector256.Create(maximum); |
|||
|
|||
this.V256_0 = NormalizeAndRoundVector256(this.V256_0, off, max); |
|||
this.V256_1 = NormalizeAndRoundVector256(this.V256_1, off, max); |
|||
this.V256_2 = NormalizeAndRoundVector256(this.V256_2, off, max); |
|||
this.V256_3 = NormalizeAndRoundVector256(this.V256_3, off, max); |
|||
this.V256_4 = NormalizeAndRoundVector256(this.V256_4, off, max); |
|||
this.V256_5 = NormalizeAndRoundVector256(this.V256_5, off, max); |
|||
this.V256_6 = NormalizeAndRoundVector256(this.V256_6, off, max); |
|||
this.V256_7 = NormalizeAndRoundVector256(this.V256_7, off, max); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Loads values from <paramref name="source"/> using extended AVX2 intrinsics.
|
|||
/// </summary>
|
|||
/// <param name="source">The source <see cref="Block8x8"/></param>
|
|||
public void LoadFromInt16ExtendedAvx2(ref Block8x8 source) |
|||
{ |
|||
DebugGuard.IsTrue( |
|||
Avx2.IsSupported, |
|||
"LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!"); |
|||
|
|||
ref short sRef = ref Unsafe.As<Block8x8, short>(ref source); |
|||
ref Vector256<float> dRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref this); |
|||
|
|||
// Vector256<ushort>.Count == 16 on AVX2
|
|||
// We can process 2 block rows in a single step
|
|||
Vector256<int> top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef)); |
|||
Vector256<int> bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count)); |
|||
dRef = Avx.ConvertToVector256Single(top); |
|||
Unsafe.Add(ref dRef, 1) = Avx.ConvertToVector256Single(bottom); |
|||
|
|||
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2))); |
|||
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3))); |
|||
Unsafe.Add(ref dRef, 2) = Avx.ConvertToVector256Single(top); |
|||
Unsafe.Add(ref dRef, 3) = Avx.ConvertToVector256Single(bottom); |
|||
|
|||
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4))); |
|||
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5))); |
|||
Unsafe.Add(ref dRef, 4) = Avx.ConvertToVector256Single(top); |
|||
Unsafe.Add(ref dRef, 5) = Avx.ConvertToVector256Single(bottom); |
|||
|
|||
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6))); |
|||
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7))); |
|||
Unsafe.Add(ref dRef, 6) = Avx.ConvertToVector256Single(top); |
|||
Unsafe.Add(ref dRef, 7) = Avx.ConvertToVector256Single(bottom); |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static Vector256<float> NormalizeAndRoundVector256(Vector256<float> value, Vector256<float> off, Vector256<float> max) |
|||
=> Vector256_.RoundToNearestInteger(Vector256_.Clamp(value + off, Vector256<float>.Zero, max)); |
|||
|
|||
private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
|||
{ |
|||
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!"); |
|||
|
|||
ref Vector256<float> aBase = ref a.V256_0; |
|||
ref Vector256<float> bBase = ref b.V256_0; |
|||
|
|||
ref Vector256<short> destRef = ref dest.V01; |
|||
Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7); |
|||
|
|||
for (nuint i = 0; i < 8; i += 2) |
|||
{ |
|||
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0))); |
|||
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1))); |
|||
|
|||
Vector256<short> row = Avx2.PackSignedSaturate(row0, row1); |
|||
row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16(); |
|||
|
|||
Unsafe.Add(ref destRef, i / 2) = row; |
|||
} |
|||
} |
|||
|
|||
private void TransposeInplace_Avx() |
|||
{ |
|||
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
|
|||
Vector256<float> r0 = Avx.InsertVector128( |
|||
this.V256_0, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L), |
|||
1); |
|||
|
|||
Vector256<float> r1 = Avx.InsertVector128( |
|||
this.V256_1, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L), |
|||
1); |
|||
|
|||
Vector256<float> r2 = Avx.InsertVector128( |
|||
this.V256_2, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L), |
|||
1); |
|||
|
|||
Vector256<float> r3 = Avx.InsertVector128( |
|||
this.V256_3, |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L), |
|||
1); |
|||
|
|||
Vector256<float> r4 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R), |
|||
1); |
|||
|
|||
Vector256<float> r5 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R), |
|||
1); |
|||
|
|||
Vector256<float> r6 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R), |
|||
1); |
|||
|
|||
Vector256<float> r7 = Avx.InsertVector128( |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(), |
|||
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R), |
|||
1); |
|||
|
|||
Vector256<float> t0 = Avx.UnpackLow(r0, r1); |
|||
Vector256<float> t2 = Avx.UnpackLow(r2, r3); |
|||
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E); |
|||
this.V256_0 = Avx.Blend(t0, v, 0xCC); |
|||
this.V256_1 = Avx.Blend(t2, v, 0x33); |
|||
|
|||
Vector256<float> t4 = Avx.UnpackLow(r4, r5); |
|||
Vector256<float> t6 = Avx.UnpackLow(r6, r7); |
|||
v = Avx.Shuffle(t4, t6, 0x4E); |
|||
this.V256_4 = Avx.Blend(t4, v, 0xCC); |
|||
this.V256_5 = Avx.Blend(t6, v, 0x33); |
|||
|
|||
Vector256<float> t1 = Avx.UnpackHigh(r0, r1); |
|||
Vector256<float> t3 = Avx.UnpackHigh(r2, r3); |
|||
v = Avx.Shuffle(t1, t3, 0x4E); |
|||
this.V256_2 = Avx.Blend(t1, v, 0xCC); |
|||
this.V256_3 = Avx.Blend(t3, v, 0x33); |
|||
|
|||
Vector256<float> t5 = Avx.UnpackHigh(r4, r5); |
|||
Vector256<float> t7 = Avx.UnpackHigh(r6, r7); |
|||
v = Avx.Shuffle(t5, t7, 0x4E); |
|||
this.V256_6 = Avx.Blend(t5, v, 0xCC); |
|||
this.V256_7 = Avx.Blend(t7, v, 0x33); |
|||
} |
|||
} |
|||
Loading…
Reference in new issue