|
|
|
@ -23,6 +23,9 @@ internal readonly unsafe struct ResizeKernel |
|
|
|
/// <summary>
|
|
|
|
/// Initializes a new instance of the <see cref="ResizeKernel"/> struct.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="startIndex">The starting index for the destination row.</param>
|
|
|
|
/// <param name="bufferPtr">The pointer to the buffer with the convolution factors.</param>
|
|
|
|
/// <param name="length">The length of the kernel.</param>
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
internal ResizeKernel(int startIndex, float* bufferPtr, int length) |
|
|
|
{ |
|
|
|
@ -31,6 +34,15 @@ internal readonly unsafe struct ResizeKernel |
|
|
|
this.Length = length; |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Gets a value indicating whether vectorization is supported.
|
|
|
|
/// </summary>
|
|
|
|
public static bool SupportsVectorization |
|
|
|
{ |
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
|
|
get => Vector256.IsHardwareAccelerated; |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Gets the start index for the destination row.
|
|
|
|
/// </summary>
|
|
|
|
@ -80,96 +92,99 @@ internal readonly unsafe struct ResizeKernel |
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
public Vector4 ConvolveCore(ref Vector4 rowStartRef) |
|
|
|
{ |
|
|
|
if (Vector512.IsHardwareAccelerated) |
|
|
|
if (SupportsVectorization) |
|
|
|
{ |
|
|
|
float* bufferStart = this.bufferPtr; |
|
|
|
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7); |
|
|
|
Vector512<float> result512_0 = Vector512<float>.Zero; |
|
|
|
Vector512<float> result512_1 = Vector512<float>.Zero; |
|
|
|
|
|
|
|
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) |
|
|
|
if (Vector512.IsHardwareAccelerated) |
|
|
|
{ |
|
|
|
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef); |
|
|
|
Vector512<float> pixels512_1 = Unsafe.As<Vector4, Vector512<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)4)); |
|
|
|
float* bufferStart = this.bufferPtr; |
|
|
|
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7); |
|
|
|
Vector512<float> result512_0 = Vector512<float>.Zero; |
|
|
|
Vector512<float> result512_1 = Vector512<float>.Zero; |
|
|
|
|
|
|
|
result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); |
|
|
|
result512_1 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart + 16), pixels512_1, result512_1); |
|
|
|
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) |
|
|
|
{ |
|
|
|
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef); |
|
|
|
Vector512<float> pixels512_1 = Unsafe.As<Vector4, Vector512<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)4)); |
|
|
|
|
|
|
|
bufferStart += 32; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8); |
|
|
|
} |
|
|
|
result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); |
|
|
|
result512_1 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart + 16), pixels512_1, result512_1); |
|
|
|
|
|
|
|
result512_0 += result512_1; |
|
|
|
bufferStart += 32; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8); |
|
|
|
} |
|
|
|
|
|
|
|
if ((this.Length & 7) >= 4) |
|
|
|
{ |
|
|
|
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef); |
|
|
|
result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); |
|
|
|
result512_0 += result512_1; |
|
|
|
|
|
|
|
bufferStart += 16; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); |
|
|
|
} |
|
|
|
if ((this.Length & 7) >= 4) |
|
|
|
{ |
|
|
|
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef); |
|
|
|
result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); |
|
|
|
|
|
|
|
Vector256<float> result256 = result512_0.GetLower() + result512_0.GetUpper(); |
|
|
|
bufferStart += 16; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); |
|
|
|
} |
|
|
|
|
|
|
|
if ((this.Length & 3) >= 2) |
|
|
|
{ |
|
|
|
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef); |
|
|
|
result256 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256); |
|
|
|
Vector256<float> result256 = result512_0.GetLower() + result512_0.GetUpper(); |
|
|
|
|
|
|
|
bufferStart += 8; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); |
|
|
|
} |
|
|
|
if ((this.Length & 3) >= 2) |
|
|
|
{ |
|
|
|
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef); |
|
|
|
result256 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256); |
|
|
|
|
|
|
|
Vector128<float> result128 = result256.GetLower() + result256.GetUpper(); |
|
|
|
bufferStart += 8; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); |
|
|
|
} |
|
|
|
|
|
|
|
if ((this.Length & 1) != 0) |
|
|
|
{ |
|
|
|
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef); |
|
|
|
result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); |
|
|
|
} |
|
|
|
Vector128<float> result128 = result256.GetLower() + result256.GetUpper(); |
|
|
|
|
|
|
|
return *(Vector4*)&result128; |
|
|
|
} |
|
|
|
else if (Vector256.IsHardwareAccelerated) |
|
|
|
{ |
|
|
|
float* bufferStart = this.bufferPtr; |
|
|
|
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3); |
|
|
|
Vector256<float> result256_0 = Vector256<float>.Zero; |
|
|
|
Vector256<float> result256_1 = Vector256<float>.Zero; |
|
|
|
if ((this.Length & 1) != 0) |
|
|
|
{ |
|
|
|
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef); |
|
|
|
result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); |
|
|
|
} |
|
|
|
|
|
|
|
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) |
|
|
|
return *(Vector4*)&result128; |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef); |
|
|
|
Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2)); |
|
|
|
float* bufferStart = this.bufferPtr; |
|
|
|
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3); |
|
|
|
Vector256<float> result256_0 = Vector256<float>.Zero; |
|
|
|
Vector256<float> result256_1 = Vector256<float>.Zero; |
|
|
|
|
|
|
|
result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); |
|
|
|
result256_1 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart + 8), pixels256_1, result256_1); |
|
|
|
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) |
|
|
|
{ |
|
|
|
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef); |
|
|
|
Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2)); |
|
|
|
|
|
|
|
bufferStart += 16; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); |
|
|
|
} |
|
|
|
result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); |
|
|
|
result256_1 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart + 8), pixels256_1, result256_1); |
|
|
|
|
|
|
|
result256_0 += result256_1; |
|
|
|
bufferStart += 16; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); |
|
|
|
} |
|
|
|
|
|
|
|
if ((this.Length & 3) >= 2) |
|
|
|
{ |
|
|
|
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef); |
|
|
|
result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); |
|
|
|
result256_0 += result256_1; |
|
|
|
|
|
|
|
bufferStart += 8; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); |
|
|
|
} |
|
|
|
if ((this.Length & 3) >= 2) |
|
|
|
{ |
|
|
|
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef); |
|
|
|
result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); |
|
|
|
|
|
|
|
Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper(); |
|
|
|
bufferStart += 8; |
|
|
|
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); |
|
|
|
} |
|
|
|
|
|
|
|
if ((this.Length & 1) != 0) |
|
|
|
{ |
|
|
|
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef); |
|
|
|
result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); |
|
|
|
} |
|
|
|
Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper(); |
|
|
|
|
|
|
|
if ((this.Length & 1) != 0) |
|
|
|
{ |
|
|
|
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef); |
|
|
|
result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); |
|
|
|
} |
|
|
|
|
|
|
|
return *(Vector4*)&result128; |
|
|
|
return *(Vector4*)&result128; |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
@ -195,6 +210,7 @@ internal readonly unsafe struct ResizeKernel |
|
|
|
/// Copy the contents of <see cref="ResizeKernel"/> altering <see cref="StartIndex"/>
|
|
|
|
/// to the value <paramref name="left"/>.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="left">The new value for <see cref="StartIndex"/>.</param>
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
internal ResizeKernel AlterLeftValue(int left) |
|
|
|
=> new(left, this.bufferPtr, this.Length); |
|
|
|
|