diff --git a/src/ImageSharp/Common/Tuples/Tuple8.cs b/src/ImageSharp/Common/Tuples/Tuple8.cs
new file mode 100644
index 000000000..c0b9a9002
--- /dev/null
+++ b/src/ImageSharp/Common/Tuples/Tuple8.cs
@@ -0,0 +1,251 @@
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp.Common.Tuples
+{
+ ///
+ /// Contains value type tuples of 8 elements.
+ /// TODO: Should T4 this stuff to be DRY
+ ///
+ internal static class Tuple8
+ {
+ ///
+ /// Value type tuple of 8 -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(float))]
+ public struct OfSingle
+ {
+ [FieldOffset(0 * sizeof(float))]
+ public float V0;
+
+ [FieldOffset(1 * sizeof(float))]
+ public float V1;
+
+ [FieldOffset(2 * sizeof(float))]
+ public float V2;
+
+ [FieldOffset(3 * sizeof(float))]
+ public float V3;
+
+ [FieldOffset(4 * sizeof(float))]
+ public float V4;
+
+ [FieldOffset(5 * sizeof(float))]
+ public float V5;
+
+ [FieldOffset(6 * sizeof(float))]
+ public float V6;
+
+ [FieldOffset(7 * sizeof(float))]
+ public float V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
+ }
+
+ ///
+ /// Value type tuple of 8 -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(int))]
+ public struct OfInt32
+ {
+ [FieldOffset(0 * sizeof(int))]
+ public int V0;
+
+ [FieldOffset(1 * sizeof(int))]
+ public int V1;
+
+ [FieldOffset(2 * sizeof(int))]
+ public int V2;
+
+ [FieldOffset(3 * sizeof(int))]
+ public int V3;
+
+ [FieldOffset(4 * sizeof(int))]
+ public int V4;
+
+ [FieldOffset(5 * sizeof(int))]
+ public int V5;
+
+ [FieldOffset(6 * sizeof(int))]
+ public int V6;
+
+ [FieldOffset(7 * sizeof(int))]
+ public int V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
+ }
+
+ ///
+ /// Value type tuple of 8 -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
+ public struct OfUInt32
+ {
+ [FieldOffset(0 * sizeof(uint))]
+ public uint V0;
+
+ [FieldOffset(1 * sizeof(uint))]
+ public uint V1;
+
+ [FieldOffset(2 * sizeof(uint))]
+ public uint V2;
+
+ [FieldOffset(3 * sizeof(uint))]
+ public uint V3;
+
+ [FieldOffset(4 * sizeof(uint))]
+ public uint V4;
+
+ [FieldOffset(5 * sizeof(uint))]
+ public uint V5;
+
+ [FieldOffset(6 * sizeof(uint))]
+ public uint V6;
+
+ [FieldOffset(7 * sizeof(uint))]
+ public uint V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
+
+ public void LoadFrom(ref OfUInt16 i)
+ {
+ this.V0 = i.V0;
+ this.V1 = i.V1;
+ this.V2 = i.V2;
+ this.V3 = i.V3;
+ this.V4 = i.V4;
+ this.V5 = i.V5;
+ this.V6 = i.V6;
+ this.V7 = i.V7;
+ }
+ }
+
+ ///
+ /// Value type tuple of 8 -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(ushort))]
+ public struct OfUInt16
+ {
+ [FieldOffset(0 * sizeof(ushort))]
+ public ushort V0;
+
+ [FieldOffset(1 * sizeof(ushort))]
+ public ushort V1;
+
+ [FieldOffset(2 * sizeof(ushort))]
+ public ushort V2;
+
+ [FieldOffset(3 * sizeof(ushort))]
+ public ushort V3;
+
+ [FieldOffset(4 * sizeof(ushort))]
+ public ushort V4;
+
+ [FieldOffset(5 * sizeof(ushort))]
+ public ushort V5;
+
+ [FieldOffset(6 * sizeof(ushort))]
+ public ushort V6;
+
+ [FieldOffset(7 * sizeof(ushort))]
+ public ushort V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
+ }
+
+ ///
+ /// Value type tuple of 8 -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(short))]
+ public struct OfInt16
+ {
+ [FieldOffset(0 * sizeof(short))]
+ public short V0;
+
+ [FieldOffset(1 * sizeof(short))]
+ public short V1;
+
+ [FieldOffset(2 * sizeof(short))]
+ public short V2;
+
+ [FieldOffset(3 * sizeof(short))]
+ public short V3;
+
+ [FieldOffset(4 * sizeof(short))]
+ public short V4;
+
+ [FieldOffset(5 * sizeof(short))]
+ public short V5;
+
+ [FieldOffset(6 * sizeof(short))]
+ public short V6;
+
+ [FieldOffset(7 * sizeof(short))]
+ public short V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
+ }
+
+ ///
+ /// Value type tuple of 8 -s
+ ///
+ [StructLayout(LayoutKind.Explicit, Size = 8)]
+ public struct OfByte
+ {
+ [FieldOffset(0)]
+ public byte V0;
+
+ [FieldOffset(1)]
+ public byte V1;
+
+ [FieldOffset(2)]
+ public byte V2;
+
+ [FieldOffset(3)]
+ public byte V3;
+
+ [FieldOffset(4)]
+ public byte V4;
+
+ [FieldOffset(5)]
+ public byte V5;
+
+ [FieldOffset(6)]
+ public byte V6;
+
+ [FieldOffset(7)]
+ public byte V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
+
+ public void LoadFrom(ref OfUInt32 i)
+ {
+ this.V0 = (byte)i.V0;
+ this.V1 = (byte)i.V1;
+ this.V2 = (byte)i.V2;
+ this.V3 = (byte)i.V3;
+ this.V4 = (byte)i.V4;
+ this.V5 = (byte)i.V5;
+ this.V6 = (byte)i.V6;
+ this.V7 = (byte)i.V7;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Tuples/Vector4Pair.cs b/src/ImageSharp/Common/Tuples/Vector4Pair.cs
new file mode 100644
index 000000000..1be936b30
--- /dev/null
+++ b/src/ImageSharp/Common/Tuples/Vector4Pair.cs
@@ -0,0 +1,73 @@
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp.Common.Tuples
+{
+ ///
+ /// Its faster to process multiple Vector4-s together, so let's pair them!
+ /// On AVX2 this pair should be convertible to of !
+ ///
+ [StructLayout(LayoutKind.Sequential)]
+ internal struct Vector4Pair
+ {
+ public Vector4 A;
+
+ public Vector4 B;
+
+ private static readonly Vector4 Scale = new Vector4(1 / 255f);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void MultiplyInplace(float value)
+ {
+ this.A *= value;
+ this.B *= value;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void AddInplace(Vector4 value)
+ {
+ this.A += value;
+ this.B += value;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void AddInplace(ref Vector4Pair other)
+ {
+ this.A += other.A;
+ this.B += other.B;
+ }
+
+ ///
+ /// Color-conversion specific downscale method.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal void RoundAndDownscaleBasic()
+ {
+ ref Vector a = ref Unsafe.As>(ref this.A);
+ a = a.FastRound();
+
+ ref Vector b = ref Unsafe.As>(ref this.B);
+ b = b.FastRound();
+
+ // Downscale by 1/255
+ this.A *= Scale;
+ this.B *= Scale;
+ }
+
+ ///
+ /// AVX2-only color-conversion specific downscale method.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal void RoundAndDownscaleAvx2()
+ {
+ ref Vector self = ref Unsafe.As>(ref this);
+ Vector v = self;
+ v = v.FastRound();
+
+ // Downscale by 1/255
+ v *= new Vector(1 / 255f);
+ self = v;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs b/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs
index 3f4c69c3e..1291f160a 100644
--- a/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs
@@ -176,17 +176,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
}
///
- /// Convert into
+ /// Convert to
///
public Block8x8F AsFloatBlock()
{
- // TODO: Optimize this
var result = default(Block8x8F);
- for (int i = 0; i < Size; i++)
- {
- result[i] = this[i];
- }
-
+ this.CopyToFloatBlock(ref result);
return result;
}
@@ -302,5 +297,85 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
return result;
}
+
+ ///
+ /// Convert values into as -s
+ ///
+ public void CopyToFloatBlock(ref Block8x8F dest)
+ {
+ ref short selfRef = ref Unsafe.As(ref this);
+
+ dest.V0L.X = Unsafe.Add(ref selfRef, 0);
+ dest.V0L.Y = Unsafe.Add(ref selfRef, 1);
+ dest.V0L.Z = Unsafe.Add(ref selfRef, 2);
+ dest.V0L.W = Unsafe.Add(ref selfRef, 3);
+ dest.V0R.X = Unsafe.Add(ref selfRef, 4);
+ dest.V0R.Y = Unsafe.Add(ref selfRef, 5);
+ dest.V0R.Z = Unsafe.Add(ref selfRef, 6);
+ dest.V0R.W = Unsafe.Add(ref selfRef, 7);
+
+ dest.V1L.X = Unsafe.Add(ref selfRef, 8);
+ dest.V1L.Y = Unsafe.Add(ref selfRef, 9);
+ dest.V1L.Z = Unsafe.Add(ref selfRef, 10);
+ dest.V1L.W = Unsafe.Add(ref selfRef, 11);
+ dest.V1R.X = Unsafe.Add(ref selfRef, 12);
+ dest.V1R.Y = Unsafe.Add(ref selfRef, 13);
+ dest.V1R.Z = Unsafe.Add(ref selfRef, 14);
+ dest.V1R.W = Unsafe.Add(ref selfRef, 15);
+
+ dest.V2L.X = Unsafe.Add(ref selfRef, 16);
+ dest.V2L.Y = Unsafe.Add(ref selfRef, 17);
+ dest.V2L.Z = Unsafe.Add(ref selfRef, 18);
+ dest.V2L.W = Unsafe.Add(ref selfRef, 19);
+ dest.V2R.X = Unsafe.Add(ref selfRef, 20);
+ dest.V2R.Y = Unsafe.Add(ref selfRef, 21);
+ dest.V2R.Z = Unsafe.Add(ref selfRef, 22);
+ dest.V2R.W = Unsafe.Add(ref selfRef, 23);
+
+ dest.V3L.X = Unsafe.Add(ref selfRef, 24);
+ dest.V3L.Y = Unsafe.Add(ref selfRef, 25);
+ dest.V3L.Z = Unsafe.Add(ref selfRef, 26);
+ dest.V3L.W = Unsafe.Add(ref selfRef, 27);
+ dest.V3R.X = Unsafe.Add(ref selfRef, 28);
+ dest.V3R.Y = Unsafe.Add(ref selfRef, 29);
+ dest.V3R.Z = Unsafe.Add(ref selfRef, 30);
+ dest.V3R.W = Unsafe.Add(ref selfRef, 31);
+
+ dest.V4L.X = Unsafe.Add(ref selfRef, 32);
+ dest.V4L.Y = Unsafe.Add(ref selfRef, 33);
+ dest.V4L.Z = Unsafe.Add(ref selfRef, 34);
+ dest.V4L.W = Unsafe.Add(ref selfRef, 35);
+ dest.V4R.X = Unsafe.Add(ref selfRef, 36);
+ dest.V4R.Y = Unsafe.Add(ref selfRef, 37);
+ dest.V4R.Z = Unsafe.Add(ref selfRef, 38);
+ dest.V4R.W = Unsafe.Add(ref selfRef, 39);
+
+ dest.V5L.X = Unsafe.Add(ref selfRef, 40);
+ dest.V5L.Y = Unsafe.Add(ref selfRef, 41);
+ dest.V5L.Z = Unsafe.Add(ref selfRef, 42);
+ dest.V5L.W = Unsafe.Add(ref selfRef, 43);
+ dest.V5R.X = Unsafe.Add(ref selfRef, 44);
+ dest.V5R.Y = Unsafe.Add(ref selfRef, 45);
+ dest.V5R.Z = Unsafe.Add(ref selfRef, 46);
+ dest.V5R.W = Unsafe.Add(ref selfRef, 47);
+
+ dest.V6L.X = Unsafe.Add(ref selfRef, 48);
+ dest.V6L.Y = Unsafe.Add(ref selfRef, 49);
+ dest.V6L.Z = Unsafe.Add(ref selfRef, 50);
+ dest.V6L.W = Unsafe.Add(ref selfRef, 51);
+ dest.V6R.X = Unsafe.Add(ref selfRef, 52);
+ dest.V6R.Y = Unsafe.Add(ref selfRef, 53);
+ dest.V6R.Z = Unsafe.Add(ref selfRef, 54);
+ dest.V6R.W = Unsafe.Add(ref selfRef, 55);
+
+ dest.V7L.X = Unsafe.Add(ref selfRef, 56);
+ dest.V7L.Y = Unsafe.Add(ref selfRef, 57);
+ dest.V7L.Z = Unsafe.Add(ref selfRef, 58);
+ dest.V7L.W = Unsafe.Add(ref selfRef, 59);
+ dest.V7R.X = Unsafe.Add(ref selfRef, 60);
+ dest.V7R.Y = Unsafe.Add(ref selfRef, 61);
+ dest.V7R.Z = Unsafe.Add(ref selfRef, 62);
+ dest.V7R.W = Unsafe.Add(ref selfRef, 63);
+ }
}
}
\ No newline at end of file
diff --git a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs
index aecd5c59e..4ffb63480 100644
--- a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs
+++ b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs
@@ -556,7 +556,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
}
}
- private void RoundInplaceSlow()
+ internal void RoundInplaceSlow()
{
for (int i = 0; i < Size; i++)
{
diff --git a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs
index 7139260ec..2db869de7 100644
--- a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs
+++ b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs
@@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
ref Block8x8 sourceBlock,
BufferArea destArea)
{
- this.data.SourceBlock = sourceBlock.AsFloatBlock();
+ sourceBlock.CopyToFloatBlock(ref this.data.SourceBlock);
Block8x8F* b = this.pointers.SourceBlock;
diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
index e5f2fd5e7..15a794144 100644
--- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
@@ -8,6 +8,8 @@ namespace SixLabors.ImageSharp.Tests.Common
using System.Linq;
using System.Runtime.CompilerServices;
+ using SixLabors.ImageSharp.Common.Tuples;
+
using Xunit.Abstractions;
using Xunit.Sdk;
@@ -243,15 +245,15 @@ namespace SixLabors.ImageSharp.Tests.Common
x = (x * scale) + magick;
- SimdUtils.Octet.OfUInt32 ii = default(SimdUtils.Octet.OfUInt32);
+ Tuple8.OfUInt32 ii = default(Tuple8.OfUInt32);
- ref Vector iiRef = ref Unsafe.As>(ref ii);
+ ref Vector iiRef = ref Unsafe.As>(ref ii);
iiRef = x;
- //SimdUtils.Octet.OfUInt32 ii = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x);
+ //Tuple8.OfUInt32 ii = Unsafe.As, Tuple8.OfUInt32>(ref x);
- ref SimdUtils.Octet.OfByte d = ref dest.NonPortableCast()[0];
+ ref Tuple8.OfByte d = ref dest.NonPortableCast()[0];
d.LoadFrom(ref ii);
this.Output.WriteLine(ii.ToString());