@ -16,9 +16,9 @@ using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// Represents a Jpeg block with <see cref="float"/> coefficients.
/// 8x8 matrix of <see cref="float"/> coefficients.
/// </summary>
[StructLayout(LayoutKind.Sequential )]
[StructLayout(LayoutKind.Explicit )]
internal partial struct Block8x8F : IEquatable < Block8x8F >
{
/// <summary>
@ -27,28 +27,44 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public const int Size = 6 4 ;
#pragma warning disable SA1600 // ElementsMustBeDocumented
[FieldOffset(0)]
public Vector4 V0L ;
[FieldOffset(16)]
public Vector4 V0R ;
[FieldOffset(32)]
public Vector4 V1L ;
[FieldOffset(48)]
public Vector4 V1R ;
[FieldOffset(64)]
public Vector4 V2L ;
[FieldOffset(80)]
public Vector4 V2R ;
[FieldOffset(96)]
public Vector4 V3L ;
[FieldOffset(112)]
public Vector4 V3R ;
[FieldOffset(128)]
public Vector4 V4L ;
[FieldOffset(144)]
public Vector4 V4R ;
[FieldOffset(160)]
public Vector4 V5L ;
[FieldOffset(176)]
public Vector4 V5R ;
[FieldOffset(192)]
public Vector4 V6L ;
[FieldOffset(208)]
public Vector4 V6R ;
[FieldOffset(224)]
public Vector4 V7L ;
[FieldOffset(240)]
public Vector4 V7R ;
#pragma warning restore SA1600 // ElementsMustBeDocumented
@ -62,17 +78,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
GuardBlockIndex ( idx ) ;
DebugGuard . MustBeBetweenOrEqualTo ( idx , 0 , Size - 1 , nameof ( idx ) ) ;
ref float selfRef = ref Unsafe . As < Block8x8F , float > ( ref this ) ;
return Unsafe . Add ( ref selfRef , idx ) ;
return Unsafe . Add ( ref selfRef , ( nint ) ( uint ) idx ) ;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
GuardBlockIndex ( idx ) ;
DebugGuard . MustBeBetweenOrEqualTo ( idx , 0 , Size - 1 , nameof ( idx ) ) ;
ref float selfRef = ref Unsafe . As < Block8x8F , float > ( ref this ) ;
Unsafe . Add ( ref selfRef , idx ) = value ;
Unsafe . Add ( ref selfRef , ( nint ) ( uint ) idx ) = value ;
}
}
@ -148,13 +164,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return result ;
}
/// <summary>
/// Fill the block with defaults (zeroes).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void Clear ( )
= > this = default ; // The cheapest way to do this in C#:
/// <summary>
/// Load raw 32bit floating point data from source.
/// </summary>
@ -262,7 +271,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public float [ ] ToArray ( )
{
var result = new float [ Size ] ;
float [ ] result = new float [ Size ] ;
this . ScaledCopyTo ( result ) ;
return result ;
}
@ -278,14 +287,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
if ( Avx . IsSupported )
{
var valueVec = Vector256 . Create ( value ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V0L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V0L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V1L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V1L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V2L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V2L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V3L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V3L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V4L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V4L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V5L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V5L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V6L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V6L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V7L ) = Avx . Multiply ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V7L ) , valueVec ) ;
this . V0 = Avx . Multiply ( this . V0 , valueVec ) ;
this . V1 = Avx . Multiply ( this . V1 , valueVec ) ;
this . V2 = Avx . Multiply ( this . V2 , valueVec ) ;
this . V3 = Avx . Multiply ( this . V3 , valueVec ) ;
this . V4 = Avx . Multiply ( this . V4 , valueVec ) ;
this . V5 = Avx . Multiply ( this . V5 , valueVec ) ;
this . V6 = Avx . Multiply ( this . V6 , valueVec ) ;
this . V7 = Avx . Multiply ( this . V7 , valueVec ) ;
}
else
#endif
@ -319,45 +328,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
#if SUPPORTS_RUNTIME_INTRINSICS
if ( Avx . IsSupported )
{
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V0L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V0L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V0L ) ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V1L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V1L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V1L ) ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V2L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V2L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V2L ) ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V3L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V3L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V3L ) ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V4L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V4L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V4L ) ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V5L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V5L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V5L ) ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V6L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V6L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V6L ) ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V7L )
= Avx . Multiply (
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V7L ) ,
Unsafe . As < Vector4 , Vector256 < float > > ( ref other . V7L ) ) ;
this . V0 = Avx . Multiply ( this . V0 , other . V0 ) ;
this . V1 = Avx . Multiply ( this . V1 , other . V1 ) ;
this . V2 = Avx . Multiply ( this . V2 , other . V2 ) ;
this . V3 = Avx . Multiply ( this . V3 , other . V3 ) ;
this . V4 = Avx . Multiply ( this . V4 , other . V4 ) ;
this . V5 = Avx . Multiply ( this . V5 , other . V5 ) ;
this . V6 = Avx . Multiply ( this . V6 , other . V6 ) ;
this . V7 = Avx . Multiply ( this . V7 , other . V7 ) ;
}
else
#endif
@ -392,14 +370,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
if ( Avx . IsSupported )
{
var valueVec = Vector256 . Create ( value ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V0L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V0L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V1L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V1L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V2L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V2L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V3L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V3L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V4L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V4L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V5L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V5L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V6L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V6L ) , valueVec ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V7L ) = Avx . Add ( Unsafe . As < Vector4 , Vector256 < float > > ( ref this . V7L ) , valueVec ) ;
this . V0 = Avx . Add ( this . V0 , valueVec ) ;
this . V1 = Avx . Add ( this . V1 , valueVec ) ;
this . V2 = Avx . Add ( this . V2 , valueVec ) ;
this . V3 = Avx . Add ( this . V3 , valueVec ) ;
this . V4 = Avx . Add ( this . V4 , valueVec ) ;
this . V5 = Avx . Add ( this . V5 , valueVec ) ;
this . V6 = Avx . Add ( this . V6 , valueVec ) ;
this . V7 = Avx . Add ( this . V7 , valueVec ) ;
}
else
#endif
@ -425,183 +403,37 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <summary>
/// Quantize the block.
/// </summary>
/// <param name="blockPtr">The block pointer.</param>
/// <param name="qtPtr">The qt pointer.</param>
/// <param name="unzigPtr">Unzig pointer</param>
public static unsafe void DequantizeBlock ( Block8x8F * blockPtr , Block8x8F * qtPtr , byte * unzigPtr )
{
float * b = ( float * ) blockPtr ;
float * qtp = ( float * ) qtPtr ;
for ( int qtIndex = 0 ; qtIndex < Size ; qtIndex + + )
{
byte blockIndex = unzigPtr [ qtIndex ] ;
float * unzigPos = b + blockIndex ;
float val = * unzigPos ;
val * = qtp [ qtIndex ] ;
* unzigPos = val ;
}
}
/// <summary>
/// Quantize 'block' into 'dest' using the 'qt' quantization table:
/// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values.
/// To finish the rounding it's enough to (int)-cast these values.
/// </summary>
/// <param name="block">Source block</param>
/// <param name="dest">Destination block</param>
/// <param name="qt">The quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
public static unsafe void Quantize (
ref Block8x8F block ,
ref Block8x8F dest ,
ref Block8x8F qt ,
ref ZigZag unZig )
{
for ( int zig = 0 ; zig < Size ; zig + + )
{
dest [ zig ] = block [ unZig [ zig ] ] ;
}
DivideRoundAll ( ref dest , ref qt ) ;
}
/// <summary>
/// Scales the 16x16 region represented by the 4 source blocks to the 8x8 DST block.
/// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
/// </summary>
/// <param name="destination">The destination block.</param>
/// <param name="source">The source block.</param>
public static unsafe void Scale16X16To8X8 ( ref Block8x8F destination , ReadOnlySpan < Block8x8F > source )
/// <param name="block">Source block.</param>
/// <param name="dest">Destination block.</param>
/// <param name="qt">The quantization table.</param>
public static void Quantize ( ref Block8x8F block , ref Block8x8 dest , ref Block8x8F qt )
{
#if SUPPORTS_RUNTIME_INTRINSICS
if ( Avx2 . IsSupported )
{
Scale16X16To8X8Vectorized ( ref destination , source ) ;
return ;
MultiplyIntoInt16_Avx2 ( ref block , ref qt , ref dest ) ;
ZigZag . ApplyZigZagOrderingAvx2 ( ref dest ) ;
}
#endif
Scale16X16To8X8Scalar ( ref destination , source ) ;
}
private static void Scale16X16To8X8Vectorized ( ref Block8x8F destination , ReadOnlySpan < Block8x8F > source )
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug . Assert ( Avx2 . IsSupported , "AVX2 is required to execute this method" ) ;
var f2 = Vector256 . Create ( 2f ) ;
var f025 = Vector256 . Create ( 0.25f ) ;
Vector256 < int > switchInnerDoubleWords = Unsafe . As < byte , Vector256 < int > > ( ref MemoryMarshal . GetReference ( SimdUtils . HwIntrinsics . PermuteMaskSwitchInnerDWords8x32 ) ) ;
ref Vector256 < float > destRef = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref destination ) ;
for ( int i = 0 ; i < 2 ; i + + )
else if ( Ssse3 . IsSupported )
{
ref Vector256 < float > in1 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , 2 * i ) ) ;
ref Vector256 < float > in2 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , ( 2 * i ) + 1 ) ) ;
for ( int j = 0 ; j < 8 ; j + = 2 )
{
Vector256 < float > a = Unsafe . Add ( ref in1 , j ) ;
Vector256 < float > b = Unsafe . Add ( ref in1 , j + 1 ) ;
Vector256 < float > c = Unsafe . Add ( ref in2 , j ) ;
Vector256 < float > d = Unsafe . Add ( ref in2 , j + 1 ) ;
Vector256 < float > calc1 = Avx . Shuffle ( a , c , 0 b10_00_10_00 ) ;
Vector256 < float > calc2 = Avx . Shuffle ( a , c , 0 b11_01_11_01 ) ;
Vector256 < float > calc3 = Avx . Shuffle ( b , d , 0 b10_00_10_00 ) ;
Vector256 < float > calc4 = Avx . Shuffle ( b , d , 0 b11_01_11_01 ) ;
Vector256 < float > sum = Avx . Add ( Avx . Add ( calc1 , calc2 ) , Avx . Add ( calc3 , calc4 ) ) ;
Vector256 < float > add = Avx . Add ( sum , f2 ) ;
Vector256 < float > res = Avx . Multiply ( add , f025 ) ;
destRef = Avx2 . PermuteVar8x32 ( res , switchInnerDoubleWords ) ;
destRef = ref Unsafe . Add ( ref destRef , 1 ) ;
}
MultiplyIntoInt16_Sse2 ( ref block , ref qt , ref dest ) ;
ZigZag . ApplyZigZagOrderingSsse3 ( ref dest ) ;
}
else
#endif
}
private static unsafe void Scale16X16To8X8Scalar ( ref Block8x8F destination , ReadOnlySpan < Block8x8F > source )
{
for ( int i = 0 ; i < 4 ; i + + )
{
int dstOff = ( ( i & 2 ) < < 4 ) | ( ( i & 1 ) < < 2 ) ;
Block8x8F iSource = source [ i ] ;
for ( int y = 0 ; y < 4 ; y + + )
{
for ( int x = 0 ; x < 4 ; x + + )
{
int j = ( 1 6 * y ) + ( 2 * x ) ;
float sum = iSource [ j ] + iSource [ j + 1 ] + iSource [ j + 8 ] + iSource [ j + 9 ] ;
destination [ ( 8 * y ) + x + dstOff ] = ( sum + 2 ) * . 2 5F ;
}
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void DivideRoundAll ( ref Block8x8F a , ref Block8x8F b )
{
#if SUPPORTS_RUNTIME_INTRINSICS
if ( Avx . IsSupported )
{
var vnegOne = Vector256 . Create ( - 1f ) ;
var vadd = Vector256 . Create ( . 5F ) ;
var vone = Vector256 . Create ( 1f ) ;
ref Vector256 < float > aBase = ref Unsafe . AsRef ( Unsafe . As < Vector4 , Vector256 < float > > ( ref a . V0L ) ) ;
ref Vector256 < float > bBase = ref Unsafe . AsRef ( Unsafe . As < Vector4 , Vector256 < float > > ( ref b . V0L ) ) ;
ref Vector256 < float > aEnd = ref Unsafe . Add ( ref aBase , 8 ) ;
do
for ( int i = 0 ; i < Size ; i + + )
{
Vector256 < float > voff = Avx . Multiply ( Avx . Min ( Avx . Max ( vnegOne , aBase ) , vone ) , vadd ) ;
Unsafe . Add ( ref aBase , 0 ) = Avx . Add ( Avx . Divide ( aBase , bBase ) , voff ) ;
aBase = ref Unsafe . Add ( ref aBase , 1 ) ;
bBase = ref Unsafe . Add ( ref bBase , 1 ) ;
int idx = ZigZag . ZigZagOrder [ i ] ;
float quantizedVal = block [ idx ] * qt [ idx ] ;
quantizedVal + = quantizedVal < 0 ? - 0.5f : 0.5f ;
dest [ i ] = ( short ) quantizedVal ;
}
while ( Unsafe . IsAddressLessThan ( ref aBase , ref aEnd ) ) ;
}
else
#endif
{
a . V0L = DivideRound ( a . V0L , b . V0L ) ;
a . V0R = DivideRound ( a . V0R , b . V0R ) ;
a . V1L = DivideRound ( a . V1L , b . V1L ) ;
a . V1R = DivideRound ( a . V1R , b . V1R ) ;
a . V2L = DivideRound ( a . V2L , b . V2L ) ;
a . V2R = DivideRound ( a . V2R , b . V2R ) ;
a . V3L = DivideRound ( a . V3L , b . V3L ) ;
a . V3R = DivideRound ( a . V3R , b . V3R ) ;
a . V4L = DivideRound ( a . V4L , b . V4L ) ;
a . V4R = DivideRound ( a . V4R , b . V4R ) ;
a . V5L = DivideRound ( a . V5L , b . V5L ) ;
a . V5R = DivideRound ( a . V5R , b . V5R ) ;
a . V6L = DivideRound ( a . V6L , b . V6L ) ;
a . V6R = DivideRound ( a . V6R , b . V6R ) ;
a . V7L = DivideRound ( a . V7L , b . V7L ) ;
a . V7R = DivideRound ( a . V7R , b . V7R ) ;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 DivideRound ( Vector4 dividend , Vector4 divisor )
{
var neg = new Vector4 ( - 1 ) ;
var add = new Vector4 ( . 5F ) ;
// sign(dividend) = max(min(dividend, 1), -1)
Vector4 sign = Numerics . Clamp ( dividend , neg , Vector4 . One ) ;
// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
return ( dividend / divisor ) + ( sign * add ) ;
}
public void RoundInto ( ref Block8x8 dest )
{
for ( int i = 0 ; i < Size ; i + + )
@ -699,6 +531,47 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Unsafe . Add ( ref dRef , 7 ) = bottom ;
}
/// <summary>
/// Compares entire 8x8 block to a single scalar value.
/// </summary>
/// <param name="value">Value to compare to.</param>
public bool EqualsToScalar ( int value )
{
#if SUPPORTS_RUNTIME_INTRINSICS
if ( Avx2 . IsSupported )
{
const int equalityMask = unchecked ( ( int ) 0 b1111_1111_1111_1111_1111_1111_1111_1111 ) ;
var targetVector = Vector256 . Create ( value ) ;
ref Vector256 < float > blockStride = ref this . V0 ;
for ( int i = 0 ; i < RowCount ; i + + )
{
Vector256 < int > areEqual = Avx2 . CompareEqual ( Avx . ConvertToVector256Int32WithTruncation ( Unsafe . Add ( ref this . V0 , i ) ) , targetVector ) ;
if ( Avx2 . MoveMask ( areEqual . AsByte ( ) ) ! = equalityMask )
{
return false ;
}
}
return true ;
}
#endif
{
ref float scalars = ref Unsafe . As < Block8x8F , float > ( ref this ) ;
for ( int i = 0 ; i < Size ; i + + )
{
if ( ( int ) Unsafe . Add ( ref scalars , i ) ! = value )
{
return false ;
}
}
return true ;
}
}
/// <inheritdoc />
public bool Equals ( Block8x8F other )
= > this . V0L = = other . V0L
@ -735,172 +608,89 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return sb . ToString ( ) ;
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector < float > NormalizeAndRound ( Vector < float > row , Vector < float > off , Vector < float > max )
{
row + = off ;
row = Vector . Max ( row , Vector < float > . Zero ) ;
row = Vector . Min ( row , max ) ;
return row . FastRound ( ) ;
}
[Conditional("DEBUG")]
private static void GuardBlockIndex ( int idx )
{
DebugGuard . MustBeLessThan ( idx , Size , nameof ( idx ) ) ;
DebugGuard . MustBeGreaterThanOrEqualTo ( idx , 0 , nameof ( idx ) ) ;
}
/// <summary>
/// Transpose the block into the destination block .
/// Transpose the block inplace.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto ( ref Block8x8F d )
public void TransposeInplace ( )
{
#if SUPPORTS_RUNTIME_INTRINSICS
if ( Avx . IsSupported )
{
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
Vector256 < float > r0 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V0L ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V4L ) ,
1 ) ;
Vector256 < float > r1 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V1L ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V5L ) ,
1 ) ;
Vector256 < float > r2 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V2L ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V6L ) ,
1 ) ;
Vector256 < float > r3 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V3L ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V7L ) ,
1 ) ;
Vector256 < float > r4 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V0R ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V4R ) ,
1 ) ;
Vector256 < float > r5 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V1R ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V5R ) ,
1 ) ;
Vector256 < float > r6 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V2R ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V6R ) ,
1 ) ;
Vector256 < float > r7 = Avx . InsertVector128 (
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V3R ) . ToVector256 ( ) ,
Unsafe . As < Vector4 , Vector128 < float > > ( ref this . V7R ) ,
1 ) ;
Vector256 < float > t0 = Avx . UnpackLow ( r0 , r1 ) ;
Vector256 < float > t2 = Avx . UnpackLow ( r2 , r3 ) ;
Vector256 < float > v = Avx . Shuffle ( t0 , t2 , 0x4E ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V0L ) = Avx . Blend ( t0 , v , 0xCC ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V1L ) = Avx . Blend ( t2 , v , 0x33 ) ;
Vector256 < float > t4 = Avx . UnpackLow ( r4 , r5 ) ;
Vector256 < float > t6 = Avx . UnpackLow ( r6 , r7 ) ;
v = Avx . Shuffle ( t4 , t6 , 0x4E ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V4L ) = Avx . Blend ( t4 , v , 0xCC ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V5L ) = Avx . Blend ( t6 , v , 0x33 ) ;
Vector256 < float > t1 = Avx . UnpackHigh ( r0 , r1 ) ;
Vector256 < float > t3 = Avx . UnpackHigh ( r2 , r3 ) ;
v = Avx . Shuffle ( t1 , t3 , 0x4E ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V2L ) = Avx . Blend ( t1 , v , 0xCC ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V3L ) = Avx . Blend ( t3 , v , 0x33 ) ;
Vector256 < float > t5 = Avx . UnpackHigh ( r4 , r5 ) ;
Vector256 < float > t7 = Avx . UnpackHigh ( r6 , r7 ) ;
v = Avx . Shuffle ( t5 , t7 , 0x4E ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V6L ) = Avx . Blend ( t5 , v , 0xCC ) ;
Unsafe . As < Vector4 , Vector256 < float > > ( ref d . V7L ) = Avx . Blend ( t7 , v , 0x33 ) ;
this . TransposeInplace_Avx ( ) ;
}
else
#endif
{
d . V0L . X = this . V0L . X ;
d . V1L . X = this . V0L . Y ;
d . V2L . X = this . V0L . Z ;
d . V3L . X = this . V0L . W ;
d . V4L . X = this . V0R . X ;
d . V5L . X = this . V0R . Y ;
d . V6L . X = this . V0R . Z ;
d . V7L . X = this . V0R . W ;
d . V0L . Y = this . V1L . X ;
d . V1L . Y = this . V1L . Y ;
d . V2L . Y = this . V1L . Z ;
d . V3L . Y = this . V1L . W ;
d . V4L . Y = this . V1R . X ;
d . V5L . Y = this . V1R . Y ;
d . V6L . Y = this . V1R . Z ;
d . V7L . Y = this . V1R . W ;
d . V0L . Z = this . V2L . X ;
d . V1L . Z = this . V2L . Y ;
d . V2L . Z = this . V2L . Z ;
d . V3L . Z = this . V2L . W ;
d . V4L . Z = this . V2R . X ;
d . V5L . Z = this . V2R . Y ;
d . V6L . Z = this . V2R . Z ;
d . V7L . Z = this . V2R . W ;
d . V0L . W = this . V3L . X ;
d . V1L . W = this . V3L . Y ;
d . V2L . W = this . V3L . Z ;
d . V3L . W = this . V3L . W ;
d . V4L . W = this . V3R . X ;
d . V5L . W = this . V3R . Y ;
d . V6L . W = this . V3R . Z ;
d . V7L . W = this . V3R . W ;
d . V0R . X = this . V4L . X ;
d . V1R . X = this . V4L . Y ;
d . V2R . X = this . V4L . Z ;
d . V3R . X = this . V4L . W ;
d . V4R . X = this . V4R . X ;
d . V5R . X = this . V4R . Y ;
d . V6R . X = this . V4R . Z ;
d . V7R . X = this . V4R . W ;
d . V0R . Y = this . V5L . X ;
d . V1R . Y = this . V5L . Y ;
d . V2R . Y = this . V5L . Z ;
d . V3R . Y = this . V5L . W ;
d . V4R . Y = this . V5R . X ;
d . V5R . Y = this . V5R . Y ;
d . V6R . Y = this . V5R . Z ;
d . V7R . Y = this . V5R . W ;
d . V0R . Z = this . V6L . X ;
d . V1R . Z = this . V6L . Y ;
d . V2R . Z = this . V6L . Z ;
d . V3R . Z = this . V6L . W ;
d . V4R . Z = this . V6R . X ;
d . V5R . Z = this . V6R . Y ;
d . V6R . Z = this . V6R . Z ;
d . V7R . Z = this . V6R . W ;
d . V0R . W = this . V7L . X ;
d . V1R . W = this . V7L . Y ;
d . V2R . W = this . V7L . Z ;
d . V3R . W = this . V7L . W ;
d . V4R . W = this . V7R . X ;
d . V5R . W = this . V7R . Y ;
d . V6R . W = this . V7R . Z ;
d . V7R . W = this . V7R . W ;
this . TransposeInplace_Scalar ( ) ;
}
}
/// <summary>
/// Scalar inplace transpose implementation for <see cref="TransposeInplace"/>
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void TransposeInplace_Scalar ( )
{
ref float elemRef = ref Unsafe . As < Block8x8F , float > ( ref this ) ;
// row #0
Swap ( ref Unsafe . Add ( ref elemRef , 1 ) , ref Unsafe . Add ( ref elemRef , 8 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 2 ) , ref Unsafe . Add ( ref elemRef , 1 6 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 3 ) , ref Unsafe . Add ( ref elemRef , 2 4 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 4 ) , ref Unsafe . Add ( ref elemRef , 3 2 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 5 ) , ref Unsafe . Add ( ref elemRef , 4 0 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 6 ) , ref Unsafe . Add ( ref elemRef , 4 8 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 7 ) , ref Unsafe . Add ( ref elemRef , 5 6 ) ) ;
// row #1
Swap ( ref Unsafe . Add ( ref elemRef , 1 0 ) , ref Unsafe . Add ( ref elemRef , 1 7 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 1 1 ) , ref Unsafe . Add ( ref elemRef , 2 5 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 1 2 ) , ref Unsafe . Add ( ref elemRef , 3 3 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 1 3 ) , ref Unsafe . Add ( ref elemRef , 4 1 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 1 4 ) , ref Unsafe . Add ( ref elemRef , 4 9 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 1 5 ) , ref Unsafe . Add ( ref elemRef , 5 7 ) ) ;
// row #2
Swap ( ref Unsafe . Add ( ref elemRef , 1 9 ) , ref Unsafe . Add ( ref elemRef , 2 6 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 2 0 ) , ref Unsafe . Add ( ref elemRef , 3 4 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 2 1 ) , ref Unsafe . Add ( ref elemRef , 4 2 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 2 2 ) , ref Unsafe . Add ( ref elemRef , 5 0 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 2 3 ) , ref Unsafe . Add ( ref elemRef , 5 8 ) ) ;
// row #3
Swap ( ref Unsafe . Add ( ref elemRef , 2 8 ) , ref Unsafe . Add ( ref elemRef , 3 5 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 2 9 ) , ref Unsafe . Add ( ref elemRef , 4 3 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 3 0 ) , ref Unsafe . Add ( ref elemRef , 5 1 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 3 1 ) , ref Unsafe . Add ( ref elemRef , 5 9 ) ) ;
// row #4
Swap ( ref Unsafe . Add ( ref elemRef , 3 7 ) , ref Unsafe . Add ( ref elemRef , 4 4 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 3 8 ) , ref Unsafe . Add ( ref elemRef , 5 2 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 3 9 ) , ref Unsafe . Add ( ref elemRef , 6 0 ) ) ;
// row #5
Swap ( ref Unsafe . Add ( ref elemRef , 4 6 ) , ref Unsafe . Add ( ref elemRef , 5 3 ) ) ;
Swap ( ref Unsafe . Add ( ref elemRef , 4 7 ) , ref Unsafe . Add ( ref elemRef , 6 1 ) ) ;
// row #6
Swap ( ref Unsafe . Add ( ref elemRef , 5 5 ) , ref Unsafe . Add ( ref elemRef , 6 2 ) ) ;
static void Swap ( ref float a , ref float b )
{
float tmp = a ;
a = b ;
b = tmp ;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector < float > NormalizeAndRound ( Vector < float > row , Vector < float > off , Vector < float > max )
{
row + = off ;
row = Vector . Max ( row , Vector < float > . Zero ) ;
row = Vector . Min ( row , max ) ;
return row . FastRound ( ) ;
}
}
}